Message ID | 20171006122853.16310-4-bmt@zurich.ibm.com (mailing list archive) |
---|---|
State | Changes Requested |
Headers | show |
On Fri, Oct 06, 2017 at 08:28:43AM -0400, Bernard Metzler wrote: > Signed-off-by: Bernard Metzler <bmt@zurich.ibm.com> > --- > drivers/infiniband/sw/siw/siw_main.c | 752 +++++++++++++++++++++++++++++++++++ > 1 file changed, 752 insertions(+) > create mode 100644 drivers/infiniband/sw/siw/siw_main.c > > diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c > new file mode 100644 > index 000000000000..5a054c6becaa > --- /dev/null > +++ b/drivers/infiniband/sw/siw/siw_main.c > @@ -0,0 +1,752 @@ > +/* > + * Software iWARP device driver for Linux > + * > + * Authors: Bernard Metzler <bmt@zurich.ibm.com> > + * > + * Copyright (c) 2008-2017, IBM Corporation > + * > + * This software is available to you under a choice of one of two > + * licenses. You may choose to be licensed under the terms of the GNU > + * General Public License (GPL) Version 2, available from the file > + * COPYING in the main directory of this source tree, or the > + * BSD license below: > + * > + * Redistribution and use in source and binary forms, with or > + * without modification, are permitted provided that the following > + * conditions are met: > + * > + * - Redistributions of source code must retain the above copyright notice, > + * this list of conditions and the following disclaimer. > + * > + * - Redistributions in binary form must reproduce the above copyright > + * notice, this list of conditions and the following disclaimer in the > + * documentation and/or other materials provided with the distribution. > + * > + * - Neither the name of IBM nor the names of its contributors may be > + * used to endorse or promote products derived from this software without > + * specific prior written permission. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND > + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS > + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN > + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN > + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE > + * SOFTWARE. > + */ > + > +#include <linux/init.h> > +#include <linux/errno.h> > +#include <linux/netdevice.h> > +#include <linux/inetdevice.h> > +#include <net/net_namespace.h> > +#include <linux/rtnetlink.h> > +#include <linux/if_arp.h> > +#include <linux/list.h> > +#include <linux/kernel.h> > +#include <linux/dma-mapping.h> > + > +#include <rdma/ib_verbs.h> > +#include <rdma/ib_smi.h> > +#include <rdma/ib_user_verbs.h> > + > +#include "siw.h" > +#include "siw_obj.h" > +#include "siw_cm.h" > +#include "siw_verbs.h" > +#include <linux/kthread.h> > + > + > +MODULE_AUTHOR("Bernard Metzler"); > +MODULE_DESCRIPTION("Software iWARP Driver"); > +MODULE_LICENSE("Dual BSD/GPL"); > +MODULE_VERSION("0.2"); No module versions please, it is useless. > + > +#define SIW_MAX_IF 12 > +static int if_cnt; > +static char *iface_list[SIW_MAX_IF] = {[0 ... (SIW_MAX_IF-1)] = '\0'}; > +module_param_array(iface_list, charp, &if_cnt, 0444); > +MODULE_PARM_DESC(iface_list, "Interface list siw attaches to if present"); > + > +static bool loopback_enabled = 1; > +module_param(loopback_enabled, bool, 0644); > +MODULE_PARM_DESC(loopback_enabled, "enable_loopback"); > + > +LIST_HEAD(siw_devlist); > + > +static int cpu_cnt; > +static char *tx_cpu_list[MAX_CPU] = {[0 ... (MAX_CPU-1)] = '\0'}; > +module_param_array(tx_cpu_list, charp, &cpu_cnt, 0444); > +MODULE_PARM_DESC(tx_cpu_list, "List of CPUs siw TX thread shall be bound to"); No module parameters please. > + > +int default_tx_cpu = -1; > +struct task_struct *qp_tx_thread[MAX_CPU]; > +struct crypto_shash *siw_crypto_shash; > + > +static ssize_t show_sw_version(struct device *dev, > + struct device_attribute *attr, char *buf) > +{ > + struct siw_dev *sdev = container_of(dev, struct siw_dev, ofa_dev.dev); Please remove "ofa_*" from this code, upstream has nothing to do with OFA. > + > + return sprintf(buf, "%x\n", sdev->attrs.version); > +} > + > +static DEVICE_ATTR(sw_version, 0444, show_sw_version, NULL); Why do you need? Does "rdma dev" work for you? > + > +static struct device_attribute *siw_dev_attributes[] = { > + &dev_attr_sw_version > +}; > + > +static void siw_device_release(struct device *dev) > +{ > + pr_info("%s device released\n", dev_name(dev)); > +} > + > +static struct device siw_generic_dma_device = { > + .dma_ops = &siw_dma_generic_ops, > + .init_name = "software-rdma-v2", > + .release = siw_device_release > +}; > + > +static struct bus_type siw_bus = { > + .name = "siw", > +}; > + > +static int siw_modify_port(struct ib_device *ofa_dev, u8 port, int mask, > + struct ib_port_modify *props) > +{ > + return -EOPNOTSUPP; > +} The proper error code is ENOSYS and if the function is not implemented, it shouldn't be set. > + > + > +static void siw_device_register(struct siw_dev *sdev) > +{ > + struct ib_device *ofa_dev = &sdev->ofa_dev; It is Linux kernel code and not OFED. > + int rv, i; > + static int dev_id = 1; > + > + rv = ib_register_device(ofa_dev, NULL); > + if (rv) { > + dprint(DBG_DM|DBG_ON, " %s: ib register error: rv=%d\n", > + ofa_dev->name, rv); > + return; > + } > + > + for (i = 0; i < ARRAY_SIZE(siw_dev_attributes); ++i) { > + rv = device_create_file(&ofa_dev->dev, siw_dev_attributes[i]); > + if (rv) { > + dprint(DBG_DM|DBG_ON, " %s: create file error: rv=%d\n", > + ofa_dev->name, rv); > + ib_unregister_device(ofa_dev); > + return; > + } > + } > + siw_debugfs_add_device(sdev); > + > + sdev->attrs.vendor_part_id = dev_id++; > + > + dprint(DBG_DM, ": '%s' at '%s', HWaddr=%02x.%02x.%02x.%02x.%02x.%02x\n", > + ofa_dev->name, sdev->netdev->name, > + *(u8 *)sdev->netdev->dev_addr, > + *((u8 *)sdev->netdev->dev_addr + 1), > + *((u8 *)sdev->netdev->dev_addr + 2), > + *((u8 *)sdev->netdev->dev_addr + 3), > + *((u8 *)sdev->netdev->dev_addr + 4), > + *((u8 *)sdev->netdev->dev_addr + 5)); > + > + sdev->is_registered = 1; > +} > + > +static void siw_device_deregister(struct siw_dev *sdev) > +{ > + int i; > + > + siw_debugfs_del_device(sdev); > + > + if (sdev->is_registered) { > + > + dprint(DBG_DM, ": deregister %s at %s\n", sdev->ofa_dev.name, > + sdev->netdev->name); > + > + for (i = 0; i < ARRAY_SIZE(siw_dev_attributes); ++i) > + device_remove_file(&sdev->ofa_dev.dev, > + siw_dev_attributes[i]); > + > + ib_unregister_device(&sdev->ofa_dev); > + } > + if (atomic_read(&sdev->num_ctx) || atomic_read(&sdev->num_srq) || > + atomic_read(&sdev->num_mem) || atomic_read(&sdev->num_cep) || > + atomic_read(&sdev->num_qp) || atomic_read(&sdev->num_cq) || > + atomic_read(&sdev->num_pd)) { > + pr_warn("SIW at %s: orphaned resources!\n", sdev->netdev->name); > + pr_warn("CTX %d, SRQ %d, QP %d, CQ %d, MEM %d, CEP %d, PD %d\n", > + atomic_read(&sdev->num_ctx), > + atomic_read(&sdev->num_srq), > + atomic_read(&sdev->num_qp), > + atomic_read(&sdev->num_cq), > + atomic_read(&sdev->num_mem), > + atomic_read(&sdev->num_cep), > + atomic_read(&sdev->num_pd)); > + } > + i = 0; > + > + while (!list_empty(&sdev->cep_list)) { > + struct siw_cep *cep = list_entry(sdev->cep_list.next, > + struct siw_cep, devq); > + list_del(&cep->devq); > + dprint(DBG_ON, ": Free CEP (0x%p), state: %d\n", > + cep, cep->state); > + kfree(cep); > + i++; > + } > + if (i) > + pr_warn("%s: free'd %d CEPs\n", __func__, i); > + > + sdev->is_registered = 0; > +} > + > +static void siw_device_destroy(struct siw_dev *sdev) > +{ > + dprint(DBG_DM, ": destroy siw device at %s\n", sdev->netdev->name); > + > + siw_idr_release(sdev); > + kfree(sdev->ofa_dev.iwcm); > + dev_put(sdev->netdev); > + ib_dealloc_device(&sdev->ofa_dev); > +} > + > + > +static int siw_match_iflist(struct net_device *dev) > +{ > + int i; > + > + if (if_cnt == 0) No need to be explicit with "== 0". > + return 1; > + > + if_cnt = min_t(int, SIW_MAX_IF, if_cnt); > + > + for (i = 0; i < if_cnt; i++) > + if (!strcmp(iface_list[i], dev->name)) > + return 1; > + return 0; > +} > + > +static struct siw_dev *siw_dev_from_netdev(struct net_device *dev) > +{ > + if (!list_empty(&siw_devlist)) { > + struct list_head *pos; > + > + list_for_each(pos, &siw_devlist) { > + struct siw_dev *sdev = > + list_entry(pos, struct siw_dev, list); > + if (sdev->netdev == dev) > + return sdev; > + } > + } > + return NULL; > +} > + > +static int siw_tx_qualified(int cpu) > +{ > + int i; > + > + if (cpu_cnt == 0) > + return 1; > + > + for (i = 0; i < cpu_cnt; i++) { > + int new_cpu; > + > + if (kstrtoint(tx_cpu_list[i], 0, &new_cpu)) > + continue; > + if (cpu == new_cpu) > + return 1; > + } > + return 0; > +} > + > +static int siw_create_tx_threads(int max_threads, int check_qualified) > +{ > + int cpu, rv, assigned = 0; > + > + if (max_threads < 0 || max_threads > MAX_CPU) > + return 0; > + > + for_each_online_cpu(cpu) { > + if (siw_tx_qualified(cpu)) { > + qp_tx_thread[cpu] = > + kthread_create(siw_run_sq, > + (unsigned long *)(long)cpu, > + "qp_tx_thread/%d", cpu); You should have very good reasons to create kernel threads and especially for each online CPU. > + kthread_bind(qp_tx_thread[cpu], cpu); > + if (IS_ERR(qp_tx_thread)) { > + rv = PTR_ERR(qp_tx_thread); > + qp_tx_thread[cpu] = NULL; > + pr_info("Binding TX thread to CPU %d failed", > + cpu); > + break; > + } > + wake_up_process(qp_tx_thread[cpu]); > + assigned++; > + if (default_tx_cpu < 0) > + default_tx_cpu = cpu; > + if (assigned >= max_threads) > + break; > + } > + } > + return assigned; > +} > + > +static int siw_dev_qualified(struct net_device *netdev) > +{ > + if (!siw_match_iflist(netdev)) { > + dprint(DBG_DM, ": %s (not selected)\n", > + netdev->name); > + return 0; > + } > + /* > + * Additional hardware support can be added here > + * (e.g. ARPHRD_FDDI, ARPHRD_ATM, ...) - see > + * <linux/if_arp.h> for type identifiers. > + */ > + if (netdev->type == ARPHRD_ETHER || > + netdev->type == ARPHRD_IEEE802 || > + netdev->type == ARPHRD_INFINIBAND || > + (netdev->type == ARPHRD_LOOPBACK && loopback_enabled)) > + return 1; > + > + return 0; > +} > + > +static void siw_verbs_sq_flush(struct ib_qp *ofa_qp) > +{ > + struct siw_qp *qp = siw_qp_ofa2siw(ofa_qp); > + > + down_write(&qp->state_lock); > + siw_sq_flush(qp); > + up_write(&qp->state_lock); > +} > + > +static void siw_verbs_rq_flush(struct ib_qp *ofa_qp) > +{ > + struct siw_qp *qp = siw_qp_ofa2siw(ofa_qp); > + > + down_write(&qp->state_lock); > + siw_rq_flush(qp); > + up_write(&qp->state_lock); > +} > + > +static struct ib_ah *siw_create_ah(struct ib_pd *pd, struct rdma_ah_attr *attr, > + struct ib_udata *udata) > +{ > + return ERR_PTR(-EOPNOTSUPP); > +} > + > +static int siw_destroy_ah(struct ib_ah *ah) > +{ > + return -EOPNOTSUPP; > +} > + ENOSYS for both. > + > +static struct siw_dev *siw_device_create(struct net_device *netdev) > +{ > + struct siw_dev *sdev = (struct siw_dev *)ib_alloc_device(sizeof(*sdev)); > + struct ib_device *ofa_dev; > + > + if (!sdev) > + goto out; > + > + ofa_dev = &sdev->ofa_dev; > + > + ofa_dev->iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL); > + if (!ofa_dev->iwcm) { > + ib_dealloc_device(ofa_dev); > + sdev = NULL; > + goto out; > + } > + > + sdev->netdev = netdev; > + list_add_tail(&sdev->list, &siw_devlist); > + > + strcpy(ofa_dev->name, SIW_IBDEV_PREFIX); > + strlcpy(ofa_dev->name + strlen(SIW_IBDEV_PREFIX), netdev->name, > + IB_DEVICE_NAME_MAX - strlen(SIW_IBDEV_PREFIX)); > + > + memset(&ofa_dev->node_guid, 0, sizeof(ofa_dev->node_guid)); > + if (netdev->type != ARPHRD_LOOPBACK) > + memcpy(&ofa_dev->node_guid, netdev->dev_addr, 6); > + else { > + /* > + * The loopback device does not have a HW address, > + * but connection mangagement lib expects gid != 0 > + */ > + size_t gidlen = min_t(size_t, strlen(ofa_dev->name), 6); > + > + memcpy(&ofa_dev->node_guid, ofa_dev->name, gidlen); > + } > + ofa_dev->owner = THIS_MODULE; > + > + ofa_dev->uverbs_cmd_mask = > + (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | > + (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | > + (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | > + (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | > + (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | > + (1ull << IB_USER_VERBS_CMD_REG_MR) | > + (1ull << IB_USER_VERBS_CMD_DEREG_MR) | > + (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | > + (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | > + (1ull << IB_USER_VERBS_CMD_POLL_CQ) | > + (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | > + (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | > + (1ull << IB_USER_VERBS_CMD_CREATE_QP) | > + (1ull << IB_USER_VERBS_CMD_QUERY_QP) | > + (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | > + (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | > + (1ull << IB_USER_VERBS_CMD_POST_SEND) | > + (1ull << IB_USER_VERBS_CMD_POST_RECV) | > + (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | > + (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | > + (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | > + (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | > + (1ull << IB_USER_VERBS_CMD_REG_MR) | > + (1ull << IB_USER_VERBS_CMD_DEREG_MR) | > + (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV); > + > + ofa_dev->node_type = RDMA_NODE_RNIC; > + memcpy(ofa_dev->node_desc, SIW_NODE_DESC_COMMON, > + sizeof(SIW_NODE_DESC_COMMON)); > + > + /* > + * Current model (one-to-one device association): > + * One Softiwarp device per net_device or, equivalently, > + * per physical port. > + */ > + ofa_dev->phys_port_cnt = 1; > + > + ofa_dev->num_comp_vectors = num_possible_cpus(); > + ofa_dev->dev.parent = &siw_generic_dma_device; > + ofa_dev->query_device = siw_query_device; > + ofa_dev->query_port = siw_query_port; > + ofa_dev->get_port_immutable = siw_get_port_immutable; > + ofa_dev->query_qp = siw_query_qp; > + ofa_dev->modify_port = siw_modify_port; > + ofa_dev->query_pkey = siw_query_pkey; > + ofa_dev->query_gid = siw_query_gid; > + ofa_dev->alloc_ucontext = siw_alloc_ucontext; > + ofa_dev->dealloc_ucontext = siw_dealloc_ucontext; > + ofa_dev->mmap = siw_mmap; > + ofa_dev->alloc_pd = siw_alloc_pd; > + ofa_dev->dealloc_pd = siw_dealloc_pd; > + ofa_dev->create_ah = siw_create_ah; > + ofa_dev->destroy_ah = siw_destroy_ah; > + ofa_dev->create_qp = siw_create_qp; > + ofa_dev->modify_qp = siw_verbs_modify_qp; > + ofa_dev->destroy_qp = siw_destroy_qp; > + ofa_dev->create_cq = siw_create_cq; > + ofa_dev->destroy_cq = siw_destroy_cq; > + ofa_dev->resize_cq = NULL; No need to set NULL. > + ofa_dev->poll_cq = siw_poll_cq; > + ofa_dev->get_dma_mr = siw_get_dma_mr; > + ofa_dev->reg_user_mr = siw_reg_user_mr; > + ofa_dev->dereg_mr = siw_dereg_mr; > + ofa_dev->alloc_mr = siw_alloc_mr; > + ofa_dev->map_mr_sg = siw_map_mr_sg; > + ofa_dev->dealloc_mw = NULL; > + > + ofa_dev->create_srq = siw_create_srq; > + ofa_dev->modify_srq = siw_modify_srq; > + ofa_dev->query_srq = siw_query_srq; > + ofa_dev->destroy_srq = siw_destroy_srq; > + ofa_dev->post_srq_recv = siw_post_srq_recv; > + > + ofa_dev->attach_mcast = NULL; > + ofa_dev->detach_mcast = NULL; > + ofa_dev->process_mad = siw_no_mad; > + > + ofa_dev->req_notify_cq = siw_req_notify_cq; > + ofa_dev->post_send = siw_post_send; > + ofa_dev->post_recv = siw_post_receive; > + > + ofa_dev->drain_sq = siw_verbs_sq_flush; > + ofa_dev->drain_rq = siw_verbs_rq_flush; > + > + ofa_dev->dev.dma_ops = &dma_virt_ops; > + > + ofa_dev->iwcm->connect = siw_connect; > + ofa_dev->iwcm->accept = siw_accept; > + ofa_dev->iwcm->reject = siw_reject; > + ofa_dev->iwcm->create_listen = siw_create_listen; > + ofa_dev->iwcm->destroy_listen = siw_destroy_listen; > + ofa_dev->iwcm->add_ref = siw_qp_get_ref; > + ofa_dev->iwcm->rem_ref = siw_qp_put_ref; > + ofa_dev->iwcm->get_qp = siw_get_ofaqp; > + > + sdev->attrs.version = VERSION_ID_SOFTIWARP; > + sdev->attrs.vendor_id = SIW_VENDOR_ID; > + sdev->attrs.vendor_part_id = SIW_VENDORT_PART_ID; > + sdev->attrs.sw_version = VERSION_ID_SOFTIWARP; > + sdev->attrs.max_qp = SIW_MAX_QP; > + sdev->attrs.max_qp_wr = SIW_MAX_QP_WR; > + sdev->attrs.max_ord = SIW_MAX_ORD_QP; > + sdev->attrs.max_ird = SIW_MAX_IRD_QP; > + sdev->attrs.cap_flags = IB_DEVICE_MEM_MGT_EXTENSIONS; > + sdev->attrs.max_sge = SIW_MAX_SGE; > + sdev->attrs.max_sge_rd = SIW_MAX_SGE_RD; > + sdev->attrs.max_cq = SIW_MAX_CQ; > + sdev->attrs.max_cqe = SIW_MAX_CQE; > + sdev->attrs.max_mr = SIW_MAX_MR; > + sdev->attrs.max_mr_size = rlimit(RLIMIT_MEMLOCK); > + sdev->attrs.max_pd = SIW_MAX_PD; > + sdev->attrs.max_mw = SIW_MAX_MW; > + sdev->attrs.max_fmr = SIW_MAX_FMR; > + sdev->attrs.max_srq = SIW_MAX_SRQ; > + sdev->attrs.max_srq_wr = SIW_MAX_SRQ_WR; > + sdev->attrs.max_srq_sge = SIW_MAX_SGE; > + > + siw_idr_init(sdev); > + INIT_LIST_HEAD(&sdev->cep_list); > + INIT_LIST_HEAD(&sdev->qp_list); > + > + atomic_set(&sdev->num_ctx, 0); > + atomic_set(&sdev->num_srq, 0); > + atomic_set(&sdev->num_qp, 0); > + atomic_set(&sdev->num_cq, 0); > + atomic_set(&sdev->num_mem, 0); > + atomic_set(&sdev->num_pd, 0); > + atomic_set(&sdev->num_cep, 0); > + > + sdev->is_registered = 0; > +out: > + if (sdev) > + dev_hold(netdev); > + > + return sdev; > +} > + > + > + > +static int siw_netdev_event(struct notifier_block *nb, unsigned long event, > + void *arg) > +{ > + struct net_device *netdev = netdev_notifier_info_to_dev(arg); > + struct in_device *in_dev; > + struct siw_dev *sdev; > + > + dprint(DBG_DM, " (dev=%s): Event %lu\n", netdev->name, event); > + > + if (dev_net(netdev) != &init_net) > + goto done; > + > + sdev = siw_dev_from_netdev(netdev); > + > + switch (event) { > + > + case NETDEV_UP: > + if (!sdev) > + break; > + > + if (sdev->is_registered) { > + sdev->state = IB_PORT_ACTIVE; > + siw_port_event(sdev, 1, IB_EVENT_PORT_ACTIVE); > + break; > + } > + > + in_dev = in_dev_get(netdev); > + if (!in_dev) { > + dprint(DBG_DM, ": %s: no in_dev\n", netdev->name); > + sdev->state = IB_PORT_INIT; > + break; > + } > + > + if (in_dev->ifa_list) { > + sdev->state = IB_PORT_ACTIVE; > + siw_device_register(sdev); > + } else { > + dprint(DBG_DM, ": %s: no ifa\n", netdev->name); > + sdev->state = IB_PORT_INIT; > + } > + in_dev_put(in_dev); > + > + break; > + > + case NETDEV_DOWN: > + if (sdev && sdev->is_registered) { > + sdev->state = IB_PORT_DOWN; > + siw_port_event(sdev, 1, IB_EVENT_PORT_ERR); > + break; > + } > + break; > + > + case NETDEV_REGISTER: > + if (!sdev) { > + if (!siw_dev_qualified(netdev)) > + break; > + > + sdev = siw_device_create(netdev); > + if (sdev) { > + sdev->state = IB_PORT_INIT; > + dprint(DBG_DM, ": new siw device for %s\n", > + netdev->name); > + } > + } > + break; > + > + case NETDEV_UNREGISTER: > + if (sdev) { > + if (sdev->is_registered) > + siw_device_deregister(sdev); > + list_del(&sdev->list); > + siw_device_destroy(sdev); > + } > + break; > + > + case NETDEV_CHANGEADDR: > + if (sdev->is_registered) > + siw_port_event(sdev, 1, IB_EVENT_LID_CHANGE); > + > + break; > + /* > + * Todo: Below netdev events are currently not handled. > + */ > + case NETDEV_CHANGEMTU: > + case NETDEV_GOING_DOWN: > + case NETDEV_CHANGE: > + > + break; > + > + default: > + break; > + } > +done: > + return NOTIFY_OK; > +} > + > +static struct notifier_block siw_netdev_nb = { > + .notifier_call = siw_netdev_event, > +}; > + > +/* > + * siw_init_module - Initialize Softiwarp module and register with netdev > + * subsystem to create Softiwarp devices per net_device > + */ > +static __init int siw_init_module(void) > +{ > + int rv; > + int nr_cpu; > + > + if (SENDPAGE_THRESH < SIW_MAX_INLINE) { > + pr_info("siw: sendpage threshold too small: %u\n", > + (int)SENDPAGE_THRESH); > + rv = EINVAL; > + goto out; > + } > + /* > + * The xprtrdma module needs at least some rudimentary bus to set > + * some devices path MTU. > + */ > + rv = bus_register(&siw_bus); bus register for the driver? no way. > + if (rv) > + goto out_nobus; > + > + siw_generic_dma_device.bus = &siw_bus; > + > + rv = device_register(&siw_generic_dma_device); > + if (rv) > + goto out; > + > + rv = siw_cm_init(); > + if (rv) > + goto out_unregister; > + > + if (DPRINT_MASK) > + siw_debug_init(); > + > + /* > + * Allocate CRC SHASH object. Fail loading siw only, if CRC is > + * required by kernel module > + */ > + siw_crypto_shash = crypto_alloc_shash("crc32c", 0, 0); > + if (IS_ERR(siw_crypto_shash)) { > + pr_info("siw: Loading CRC32c failed: %ld\n", > + PTR_ERR(siw_crypto_shash)); > + siw_crypto_shash = NULL; > + if (mpa_crc_required == true) > + goto out_unregister; > + } > + rv = register_netdevice_notifier(&siw_netdev_nb); > + if (rv) { > + siw_debugfs_delete(); > + goto out_unregister; > + } > + for (nr_cpu = 0; nr_cpu < MAX_CPU; nr_cpu++) > + qp_tx_thread[nr_cpu] = NULL; > + > + if (siw_create_tx_threads(MAX_CPU, 1) == 0) { > + pr_info("Try starting default TX thread\n"); > + if (siw_create_tx_threads(1, 0) == 0) { > + pr_info("Could not start any TX thread\n"); > + goto out_unregister; > + } > + } > + pr_info("SoftiWARP attached\n"); > + return 0; > + > +out_unregister: > + for (nr_cpu = 0; nr_cpu < MAX_CPU; nr_cpu++) { > + if (qp_tx_thread[nr_cpu]) { > + siw_stop_tx_thread(nr_cpu); > + qp_tx_thread[nr_cpu] = NULL; > + } > + } > + device_unregister(&siw_generic_dma_device); > + > + if (siw_crypto_shash) > + crypto_free_shash(siw_crypto_shash); > +out: > + bus_unregister(&siw_bus); > +out_nobus: > + pr_info("SoftIWARP attach failed. Error: %d\n", rv); > + siw_cm_exit(); > + > + return rv; > +} > + > + > +static void __exit siw_exit_module(void) > +{ > + int nr_cpu; > + > + for (nr_cpu = 0; nr_cpu < MAX_CPU; nr_cpu++) { > + if (qp_tx_thread[nr_cpu]) { > + siw_stop_tx_thread(nr_cpu); > + qp_tx_thread[nr_cpu] = NULL; > + } > + } > + unregister_netdevice_notifier(&siw_netdev_nb); > + > + siw_cm_exit(); > + > + while (!list_empty(&siw_devlist)) { > + struct siw_dev *sdev = > + list_entry(siw_devlist.next, struct siw_dev, list); > + list_del(&sdev->list); > + if (sdev->is_registered) > + siw_device_deregister(sdev); > + > + siw_device_destroy(sdev); > + } > + if (siw_crypto_shash) > + crypto_free_shash(siw_crypto_shash); > + > + siw_debugfs_delete(); > + > + device_unregister(&siw_generic_dma_device); > + > + bus_unregister(&siw_bus); > + > + pr_info("SoftiWARP detached\n"); > +} > + > +module_init(siw_init_module); > +module_exit(siw_exit_module); > -- > 2.13.6 > > -- > To unsubscribe from this list: send the line "unsubscribe linux-rdma" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html
On 10/6/2017 8:28 AM, Bernard Metzler wrote: > Signed-off-by: Bernard Metzler <bmt@zurich.ibm.com> > +static bool loopback_enabled = 1; > +module_param(loopback_enabled, bool, 0644); > +MODULE_PARM_DESC(loopback_enabled, "enable_loopback"); > + > +LIST_HEAD(siw_devlist); Pretty much everyone here hates module parameters and you will get push back on principal, but this is an example of why they are bad. The siw_devlist shows you expect to have multiple devices, which is fine and of course makes sense. However, the module parameter is driver wide. This means you can not have one device in loopback mode while the other isn't. > + > + strcpy(ofa_dev->name, SIW_IBDEV_PREFIX); > + strlcpy(ofa_dev->name + strlen(SIW_IBDEV_PREFIX), netdev->name, > + IB_DEVICE_NAME_MAX - strlen(SIW_IBDEV_PREFIX)); I'd encourage you to use the likes of strncpy and friends. -Denny -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
-----Leon Romanovsky <leon@kernel.org> wrote: ----- >To: Bernard Metzler <bmt@zurich.ibm.com> >From: Leon Romanovsky <leon@kernel.org> >Date: 10/08/2017 03:03PM >Cc: linux-rdma@vger.kernel.org >Subject: Re: [PATCH v2 03/13] Attach/detach SoftiWarp to/from network >and RDMA subsystem > >On Fri, Oct 06, 2017 at 08:28:43AM -0400, Bernard Metzler wrote: >> Signed-off-by: Bernard Metzler <bmt@zurich.ibm.com> >> --- >> drivers/infiniband/sw/siw/siw_main.c | 752 >+++++++++++++++++++++++++++++++++++ >> 1 file changed, 752 insertions(+) >> create mode 100644 drivers/infiniband/sw/siw/siw_main.c >> >> diff --git a/drivers/infiniband/sw/siw/siw_main.c >b/drivers/infiniband/sw/siw/siw_main.c >> new file mode 100644 >> index 000000000000..5a054c6becaa >> --- /dev/null >> +++ b/drivers/infiniband/sw/siw/siw_main.c >> @@ -0,0 +1,752 @@ >> +/* >> + * Software iWARP device driver for Linux >> + * >> + * Authors: Bernard Metzler <bmt@zurich.ibm.com> >> + * >> + * Copyright (c) 2008-2017, IBM Corporation >> + * >> + * This software is available to you under a choice of one of two >> + * licenses. You may choose to be licensed under the terms of the >GNU >> + * General Public License (GPL) Version 2, available from the file >> + * COPYING in the main directory of this source tree, or the >> + * BSD license below: >> + * >> + * Redistribution and use in source and binary forms, with or >> + * without modification, are permitted provided that the >following >> + * conditions are met: >> + * >> + * - Redistributions of source code must retain the above >copyright notice, >> + * this list of conditions and the following disclaimer. >> + * >> + * - Redistributions in binary form must reproduce the above >copyright >> + * notice, this list of conditions and the following >disclaimer in the >> + * documentation and/or other materials provided with the >distribution. >> + * >> + * - Neither the name of IBM nor the names of its contributors >may be >> + * used to endorse or promote products derived from this >software without >> + * specific prior written permission. >> + * >> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, >> + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES >OF >> + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND >> + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT >HOLDERS >> + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN >AN >> + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR >IN >> + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN >THE >> + * SOFTWARE. >> + */ >> + >> +#include <linux/init.h> >> +#include <linux/errno.h> >> +#include <linux/netdevice.h> >> +#include <linux/inetdevice.h> >> +#include <net/net_namespace.h> >> +#include <linux/rtnetlink.h> >> +#include <linux/if_arp.h> >> +#include <linux/list.h> >> +#include <linux/kernel.h> >> +#include <linux/dma-mapping.h> >> + >> +#include <rdma/ib_verbs.h> >> +#include <rdma/ib_smi.h> >> +#include <rdma/ib_user_verbs.h> >> + >> +#include "siw.h" >> +#include "siw_obj.h" >> +#include "siw_cm.h" >> +#include "siw_verbs.h" >> +#include <linux/kthread.h> >> + >> + >> +MODULE_AUTHOR("Bernard Metzler"); >> +MODULE_DESCRIPTION("Software iWARP Driver"); >> +MODULE_LICENSE("Dual BSD/GPL"); >> +MODULE_VERSION("0.2"); > >No module versions please, it is useless. Okay. > >> + >> +#define SIW_MAX_IF 12 >> +static int if_cnt; >> +static char *iface_list[SIW_MAX_IF] = {[0 ... (SIW_MAX_IF-1)] = >'\0'}; >> +module_param_array(iface_list, charp, &if_cnt, 0444); >> +MODULE_PARM_DESC(iface_list, "Interface list siw attaches to if >present"); >> + >> +static bool loopback_enabled = 1; >> +module_param(loopback_enabled, bool, 0644); >> +MODULE_PARM_DESC(loopback_enabled, "enable_loopback"); >> + >> +LIST_HEAD(siw_devlist); >> + >> +static int cpu_cnt; >> +static char *tx_cpu_list[MAX_CPU] = {[0 ... (MAX_CPU-1)] = '\0'}; >> +module_param_array(tx_cpu_list, charp, &cpu_cnt, 0444); >> +MODULE_PARM_DESC(tx_cpu_list, "List of CPUs siw TX thread shall be >bound to"); > >No module parameters please. OK. Would you have a pointer which sheds some light on that rule? Thank you! > >> + >> +int default_tx_cpu = -1; >> +struct task_struct *qp_tx_thread[MAX_CPU]; >> +struct crypto_shash *siw_crypto_shash; >> + >> +static ssize_t show_sw_version(struct device *dev, >> + struct device_attribute *attr, char *buf) >> +{ >> + struct siw_dev *sdev = container_of(dev, struct siw_dev, >ofa_dev.dev); > >Please remove "ofa_*" from this code, upstream has nothing to do with >OFA. OK, sure. That came from the fact it felt wired to write an iWarp driver and use ib_*. Shall we stick to history and name those things ib even if we have now 4 transports, or can we come up with something more generic? ofa obviously is not a good idea. > >> + >> + return sprintf(buf, "%x\n", sdev->attrs.version); >> +} >> + >> +static DEVICE_ATTR(sw_version, 0444, show_sw_version, NULL); > >Why do you need? Does "rdma dev" work for you? siw over time went through several versions with different user interfaces (I added memory mapping to SQ/RQ/CQ late, changed the WQE structure, etc. I wanted to make sure the kernel module talks to the right user library. > >> + >> +static struct device_attribute *siw_dev_attributes[] = { >> + &dev_attr_sw_version >> +}; >> + >> +static void siw_device_release(struct device *dev) >> +{ >> + pr_info("%s device released\n", dev_name(dev)); >> +} >> + >> +static struct device siw_generic_dma_device = { >> + .dma_ops = &siw_dma_generic_ops, >> + .init_name = "software-rdma-v2", >> + .release = siw_device_release >> +}; >> + >> +static struct bus_type siw_bus = { >> + .name = "siw", >> +}; >> + >> +static int siw_modify_port(struct ib_device *ofa_dev, u8 port, int >mask, >> + struct ib_port_modify *props) >> +{ >> + return -EOPNOTSUPP; >> +} > >The proper error code is ENOSYS and if the function is not >implemented, >it shouldn't be set. I think checkpatch didnt like ENOSYS. I will see and fix accordingly. > >> + >> + >> +static void siw_device_register(struct siw_dev *sdev) >> +{ >> + struct ib_device *ofa_dev = &sdev->ofa_dev; > >It is Linux kernel code and not OFED. Got it. > >> + int rv, i; >> + static int dev_id = 1; >> + >> + rv = ib_register_device(ofa_dev, NULL); >> + if (rv) { >> + dprint(DBG_DM|DBG_ON, " %s: ib register error: rv=%d\n", >> + ofa_dev->name, rv); >> + return; >> + } >> + >> + for (i = 0; i < ARRAY_SIZE(siw_dev_attributes); ++i) { >> + rv = device_create_file(&ofa_dev->dev, siw_dev_attributes[i]); >> + if (rv) { >> + dprint(DBG_DM|DBG_ON, " %s: create file error: rv=%d\n", >> + ofa_dev->name, rv); >> + ib_unregister_device(ofa_dev); >> + return; >> + } >> + } >> + siw_debugfs_add_device(sdev); >> + >> + sdev->attrs.vendor_part_id = dev_id++; >> + >> + dprint(DBG_DM, ": '%s' at '%s', >HWaddr=%02x.%02x.%02x.%02x.%02x.%02x\n", >> + ofa_dev->name, sdev->netdev->name, >> + *(u8 *)sdev->netdev->dev_addr, >> + *((u8 *)sdev->netdev->dev_addr + 1), >> + *((u8 *)sdev->netdev->dev_addr + 2), >> + *((u8 *)sdev->netdev->dev_addr + 3), >> + *((u8 *)sdev->netdev->dev_addr + 4), >> + *((u8 *)sdev->netdev->dev_addr + 5)); >> + >> + sdev->is_registered = 1; >> +} >> + >> +static void siw_device_deregister(struct siw_dev *sdev) >> +{ >> + int i; >> + >> + siw_debugfs_del_device(sdev); >> + >> + if (sdev->is_registered) { >> + >> + dprint(DBG_DM, ": deregister %s at %s\n", sdev->ofa_dev.name, >> + sdev->netdev->name); >> + >> + for (i = 0; i < ARRAY_SIZE(siw_dev_attributes); ++i) >> + device_remove_file(&sdev->ofa_dev.dev, >> + siw_dev_attributes[i]); >> + >> + ib_unregister_device(&sdev->ofa_dev); >> + } >> + if (atomic_read(&sdev->num_ctx) || atomic_read(&sdev->num_srq) || >> + atomic_read(&sdev->num_mem) || atomic_read(&sdev->num_cep) || >> + atomic_read(&sdev->num_qp) || atomic_read(&sdev->num_cq) || >> + atomic_read(&sdev->num_pd)) { >> + pr_warn("SIW at %s: orphaned resources!\n", sdev->netdev->name); >> + pr_warn("CTX %d, SRQ %d, QP %d, CQ %d, MEM %d, CEP %d, PD %d\n", >> + atomic_read(&sdev->num_ctx), >> + atomic_read(&sdev->num_srq), >> + atomic_read(&sdev->num_qp), >> + atomic_read(&sdev->num_cq), >> + atomic_read(&sdev->num_mem), >> + atomic_read(&sdev->num_cep), >> + atomic_read(&sdev->num_pd)); >> + } >> + i = 0; >> + >> + while (!list_empty(&sdev->cep_list)) { >> + struct siw_cep *cep = list_entry(sdev->cep_list.next, >> + struct siw_cep, devq); >> + list_del(&cep->devq); >> + dprint(DBG_ON, ": Free CEP (0x%p), state: %d\n", >> + cep, cep->state); >> + kfree(cep); >> + i++; >> + } >> + if (i) >> + pr_warn("%s: free'd %d CEPs\n", __func__, i); >> + >> + sdev->is_registered = 0; >> +} >> + >> +static void siw_device_destroy(struct siw_dev *sdev) >> +{ >> + dprint(DBG_DM, ": destroy siw device at %s\n", >sdev->netdev->name); >> + >> + siw_idr_release(sdev); >> + kfree(sdev->ofa_dev.iwcm); >> + dev_put(sdev->netdev); >> + ib_dealloc_device(&sdev->ofa_dev); >> +} >> + >> + >> +static int siw_match_iflist(struct net_device *dev) >> +{ >> + int i; >> + >> + if (if_cnt == 0) > >No need to be explicit with "== 0". OK > >> + return 1; >> + >> + if_cnt = min_t(int, SIW_MAX_IF, if_cnt); >> + >> + for (i = 0; i < if_cnt; i++) >> + if (!strcmp(iface_list[i], dev->name)) >> + return 1; >> + return 0; >> +} >> + >> +static struct siw_dev *siw_dev_from_netdev(struct net_device *dev) >> +{ >> + if (!list_empty(&siw_devlist)) { >> + struct list_head *pos; >> + >> + list_for_each(pos, &siw_devlist) { >> + struct siw_dev *sdev = >> + list_entry(pos, struct siw_dev, list); >> + if (sdev->netdev == dev) >> + return sdev; >> + } >> + } >> + return NULL; >> +} >> + >> +static int siw_tx_qualified(int cpu) >> +{ >> + int i; >> + >> + if (cpu_cnt == 0) >> + return 1; >> + >> + for (i = 0; i < cpu_cnt; i++) { >> + int new_cpu; >> + >> + if (kstrtoint(tx_cpu_list[i], 0, &new_cpu)) >> + continue; >> + if (cpu == new_cpu) >> + return 1; >> + } >> + return 0; >> +} >> + >> +static int siw_create_tx_threads(int max_threads, int >check_qualified) >> +{ >> + int cpu, rv, assigned = 0; >> + >> + if (max_threads < 0 || max_threads > MAX_CPU) >> + return 0; >> + >> + for_each_online_cpu(cpu) { >> + if (siw_tx_qualified(cpu)) { >> + qp_tx_thread[cpu] = >> + kthread_create(siw_run_sq, >> + (unsigned long *)(long)cpu, >> + "qp_tx_thread/%d", cpu); > >You should have very good reasons to create kernel threads and >especially for each online CPU. > Yes. As I wrote in my cover letter, I am not sure the current TX code is optimal. I started with work queues and found those to introduce lots of delay. I have to rethink. >> + kthread_bind(qp_tx_thread[cpu], cpu); >> + if (IS_ERR(qp_tx_thread)) { >> + rv = PTR_ERR(qp_tx_thread); >> + qp_tx_thread[cpu] = NULL; >> + pr_info("Binding TX thread to CPU %d failed", >> + cpu); >> + break; >> + } >> + wake_up_process(qp_tx_thread[cpu]); >> + assigned++; >> + if (default_tx_cpu < 0) >> + default_tx_cpu = cpu; >> + if (assigned >= max_threads) >> + break; >> + } >> + } >> + return assigned; >> +} >> + >> +static int siw_dev_qualified(struct net_device *netdev) >> +{ >> + if (!siw_match_iflist(netdev)) { >> + dprint(DBG_DM, ": %s (not selected)\n", >> + netdev->name); >> + return 0; >> + } >> + /* >> + * Additional hardware support can be added here >> + * (e.g. ARPHRD_FDDI, ARPHRD_ATM, ...) - see >> + * <linux/if_arp.h> for type identifiers. >> + */ >> + if (netdev->type == ARPHRD_ETHER || >> + netdev->type == ARPHRD_IEEE802 || >> + netdev->type == ARPHRD_INFINIBAND || >> + (netdev->type == ARPHRD_LOOPBACK && loopback_enabled)) >> + return 1; >> + >> + return 0; >> +} >> + >> +static void siw_verbs_sq_flush(struct ib_qp *ofa_qp) >> +{ >> + struct siw_qp *qp = siw_qp_ofa2siw(ofa_qp); >> + >> + down_write(&qp->state_lock); >> + siw_sq_flush(qp); >> + up_write(&qp->state_lock); >> +} >> + >> +static void siw_verbs_rq_flush(struct ib_qp *ofa_qp) >> +{ >> + struct siw_qp *qp = siw_qp_ofa2siw(ofa_qp); >> + >> + down_write(&qp->state_lock); >> + siw_rq_flush(qp); >> + up_write(&qp->state_lock); >> +} >> + >> +static struct ib_ah *siw_create_ah(struct ib_pd *pd, struct >rdma_ah_attr *attr, >> + struct ib_udata *udata) >> +{ >> + return ERR_PTR(-EOPNOTSUPP); >> +} >> + >> +static int siw_destroy_ah(struct ib_ah *ah) >> +{ >> + return -EOPNOTSUPP; >> +} >> + > >ENOSYS for both. > >> + >> +static struct siw_dev *siw_device_create(struct net_device >*netdev) >> +{ >> + struct siw_dev *sdev = (struct siw_dev >*)ib_alloc_device(sizeof(*sdev)); >> + struct ib_device *ofa_dev; >> + >> + if (!sdev) >> + goto out; >> + >> + ofa_dev = &sdev->ofa_dev; >> + >> + ofa_dev->iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL); >> + if (!ofa_dev->iwcm) { >> + ib_dealloc_device(ofa_dev); >> + sdev = NULL; >> + goto out; >> + } >> + >> + sdev->netdev = netdev; >> + list_add_tail(&sdev->list, &siw_devlist); >> + >> + strcpy(ofa_dev->name, SIW_IBDEV_PREFIX); >> + strlcpy(ofa_dev->name + strlen(SIW_IBDEV_PREFIX), netdev->name, >> + IB_DEVICE_NAME_MAX - strlen(SIW_IBDEV_PREFIX)); >> + >> + memset(&ofa_dev->node_guid, 0, sizeof(ofa_dev->node_guid)); >> + if (netdev->type != ARPHRD_LOOPBACK) >> + memcpy(&ofa_dev->node_guid, netdev->dev_addr, 6); >> + else { >> + /* >> + * The loopback device does not have a HW address, >> + * but connection mangagement lib expects gid != 0 >> + */ >> + size_t gidlen = min_t(size_t, strlen(ofa_dev->name), 6); >> + >> + memcpy(&ofa_dev->node_guid, ofa_dev->name, gidlen); >> + } >> + ofa_dev->owner = THIS_MODULE; >> + >> + ofa_dev->uverbs_cmd_mask = >> + (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | >> + (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | >> + (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | >> + (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | >> + (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | >> + (1ull << IB_USER_VERBS_CMD_REG_MR) | >> + (1ull << IB_USER_VERBS_CMD_DEREG_MR) | >> + (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | >> + (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | >> + (1ull << IB_USER_VERBS_CMD_POLL_CQ) | >> + (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | >> + (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | >> + (1ull << IB_USER_VERBS_CMD_CREATE_QP) | >> + (1ull << IB_USER_VERBS_CMD_QUERY_QP) | >> + (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | >> + (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | >> + (1ull << IB_USER_VERBS_CMD_POST_SEND) | >> + (1ull << IB_USER_VERBS_CMD_POST_RECV) | >> + (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | >> + (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | >> + (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | >> + (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | >> + (1ull << IB_USER_VERBS_CMD_REG_MR) | >> + (1ull << IB_USER_VERBS_CMD_DEREG_MR) | >> + (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV); >> + >> + ofa_dev->node_type = RDMA_NODE_RNIC; >> + memcpy(ofa_dev->node_desc, SIW_NODE_DESC_COMMON, >> + sizeof(SIW_NODE_DESC_COMMON)); >> + >> + /* >> + * Current model (one-to-one device association): >> + * One Softiwarp device per net_device or, equivalently, >> + * per physical port. >> + */ >> + ofa_dev->phys_port_cnt = 1; >> + >> + ofa_dev->num_comp_vectors = num_possible_cpus(); >> + ofa_dev->dev.parent = &siw_generic_dma_device; >> + ofa_dev->query_device = siw_query_device; >> + ofa_dev->query_port = siw_query_port; >> + ofa_dev->get_port_immutable = siw_get_port_immutable; >> + ofa_dev->query_qp = siw_query_qp; >> + ofa_dev->modify_port = siw_modify_port; >> + ofa_dev->query_pkey = siw_query_pkey; >> + ofa_dev->query_gid = siw_query_gid; >> + ofa_dev->alloc_ucontext = siw_alloc_ucontext; >> + ofa_dev->dealloc_ucontext = siw_dealloc_ucontext; >> + ofa_dev->mmap = siw_mmap; >> + ofa_dev->alloc_pd = siw_alloc_pd; >> + ofa_dev->dealloc_pd = siw_dealloc_pd; >> + ofa_dev->create_ah = siw_create_ah; >> + ofa_dev->destroy_ah = siw_destroy_ah; >> + ofa_dev->create_qp = siw_create_qp; >> + ofa_dev->modify_qp = siw_verbs_modify_qp; >> + ofa_dev->destroy_qp = siw_destroy_qp; >> + ofa_dev->create_cq = siw_create_cq; >> + ofa_dev->destroy_cq = siw_destroy_cq; >> + ofa_dev->resize_cq = NULL; > >No need to set NULL. Yes, thanks. > >> + ofa_dev->poll_cq = siw_poll_cq; >> + ofa_dev->get_dma_mr = siw_get_dma_mr; >> + ofa_dev->reg_user_mr = siw_reg_user_mr; >> + ofa_dev->dereg_mr = siw_dereg_mr; >> + ofa_dev->alloc_mr = siw_alloc_mr; >> + ofa_dev->map_mr_sg = siw_map_mr_sg; >> + ofa_dev->dealloc_mw = NULL; >> + >> + ofa_dev->create_srq = siw_create_srq; >> + ofa_dev->modify_srq = siw_modify_srq; >> + ofa_dev->query_srq = siw_query_srq; >> + ofa_dev->destroy_srq = siw_destroy_srq; >> + ofa_dev->post_srq_recv = siw_post_srq_recv; >> + >> + ofa_dev->attach_mcast = NULL; >> + ofa_dev->detach_mcast = NULL; >> + ofa_dev->process_mad = siw_no_mad; >> + >> + ofa_dev->req_notify_cq = siw_req_notify_cq; >> + ofa_dev->post_send = siw_post_send; >> + ofa_dev->post_recv = siw_post_receive; >> + >> + ofa_dev->drain_sq = siw_verbs_sq_flush; >> + ofa_dev->drain_rq = siw_verbs_rq_flush; >> + >> + ofa_dev->dev.dma_ops = &dma_virt_ops; >> + >> + ofa_dev->iwcm->connect = siw_connect; >> + ofa_dev->iwcm->accept = siw_accept; >> + ofa_dev->iwcm->reject = siw_reject; >> + ofa_dev->iwcm->create_listen = siw_create_listen; >> + ofa_dev->iwcm->destroy_listen = siw_destroy_listen; >> + ofa_dev->iwcm->add_ref = siw_qp_get_ref; >> + ofa_dev->iwcm->rem_ref = siw_qp_put_ref; >> + ofa_dev->iwcm->get_qp = siw_get_ofaqp; >> + >> + sdev->attrs.version = VERSION_ID_SOFTIWARP; >> + sdev->attrs.vendor_id = SIW_VENDOR_ID; >> + sdev->attrs.vendor_part_id = SIW_VENDORT_PART_ID; >> + sdev->attrs.sw_version = VERSION_ID_SOFTIWARP; >> + sdev->attrs.max_qp = SIW_MAX_QP; >> + sdev->attrs.max_qp_wr = SIW_MAX_QP_WR; >> + sdev->attrs.max_ord = SIW_MAX_ORD_QP; >> + sdev->attrs.max_ird = SIW_MAX_IRD_QP; >> + sdev->attrs.cap_flags = IB_DEVICE_MEM_MGT_EXTENSIONS; >> + sdev->attrs.max_sge = SIW_MAX_SGE; >> + sdev->attrs.max_sge_rd = SIW_MAX_SGE_RD; >> + sdev->attrs.max_cq = SIW_MAX_CQ; >> + sdev->attrs.max_cqe = SIW_MAX_CQE; >> + sdev->attrs.max_mr = SIW_MAX_MR; >> + sdev->attrs.max_mr_size = rlimit(RLIMIT_MEMLOCK); >> + sdev->attrs.max_pd = SIW_MAX_PD; >> + sdev->attrs.max_mw = SIW_MAX_MW; >> + sdev->attrs.max_fmr = SIW_MAX_FMR; >> + sdev->attrs.max_srq = SIW_MAX_SRQ; >> + sdev->attrs.max_srq_wr = SIW_MAX_SRQ_WR; >> + sdev->attrs.max_srq_sge = SIW_MAX_SGE; >> + >> + siw_idr_init(sdev); >> + INIT_LIST_HEAD(&sdev->cep_list); >> + INIT_LIST_HEAD(&sdev->qp_list); >> + >> + atomic_set(&sdev->num_ctx, 0); >> + atomic_set(&sdev->num_srq, 0); >> + atomic_set(&sdev->num_qp, 0); >> + atomic_set(&sdev->num_cq, 0); >> + atomic_set(&sdev->num_mem, 0); >> + atomic_set(&sdev->num_pd, 0); >> + atomic_set(&sdev->num_cep, 0); >> + >> + sdev->is_registered = 0; >> +out: >> + if (sdev) >> + dev_hold(netdev); >> + >> + return sdev; >> +} >> + >> + >> + >> +static int siw_netdev_event(struct notifier_block *nb, unsigned >long event, >> + void *arg) >> +{ >> + struct net_device *netdev = netdev_notifier_info_to_dev(arg); >> + struct in_device *in_dev; >> + struct siw_dev *sdev; >> + >> + dprint(DBG_DM, " (dev=%s): Event %lu\n", netdev->name, event); >> + >> + if (dev_net(netdev) != &init_net) >> + goto done; >> + >> + sdev = siw_dev_from_netdev(netdev); >> + >> + switch (event) { >> + >> + case NETDEV_UP: >> + if (!sdev) >> + break; >> + >> + if (sdev->is_registered) { >> + sdev->state = IB_PORT_ACTIVE; >> + siw_port_event(sdev, 1, IB_EVENT_PORT_ACTIVE); >> + break; >> + } >> + >> + in_dev = in_dev_get(netdev); >> + if (!in_dev) { >> + dprint(DBG_DM, ": %s: no in_dev\n", netdev->name); >> + sdev->state = IB_PORT_INIT; >> + break; >> + } >> + >> + if (in_dev->ifa_list) { >> + sdev->state = IB_PORT_ACTIVE; >> + siw_device_register(sdev); >> + } else { >> + dprint(DBG_DM, ": %s: no ifa\n", netdev->name); >> + sdev->state = IB_PORT_INIT; >> + } >> + in_dev_put(in_dev); >> + >> + break; >> + >> + case NETDEV_DOWN: >> + if (sdev && sdev->is_registered) { >> + sdev->state = IB_PORT_DOWN; >> + siw_port_event(sdev, 1, IB_EVENT_PORT_ERR); >> + break; >> + } >> + break; >> + >> + case NETDEV_REGISTER: >> + if (!sdev) { >> + if (!siw_dev_qualified(netdev)) >> + break; >> + >> + sdev = siw_device_create(netdev); >> + if (sdev) { >> + sdev->state = IB_PORT_INIT; >> + dprint(DBG_DM, ": new siw device for %s\n", >> + netdev->name); >> + } >> + } >> + break; >> + >> + case NETDEV_UNREGISTER: >> + if (sdev) { >> + if (sdev->is_registered) >> + siw_device_deregister(sdev); >> + list_del(&sdev->list); >> + siw_device_destroy(sdev); >> + } >> + break; >> + >> + case NETDEV_CHANGEADDR: >> + if (sdev->is_registered) >> + siw_port_event(sdev, 1, IB_EVENT_LID_CHANGE); >> + >> + break; >> + /* >> + * Todo: Below netdev events are currently not handled. >> + */ >> + case NETDEV_CHANGEMTU: >> + case NETDEV_GOING_DOWN: >> + case NETDEV_CHANGE: >> + >> + break; >> + >> + default: >> + break; >> + } >> +done: >> + return NOTIFY_OK; >> +} >> + >> +static struct notifier_block siw_netdev_nb = { >> + .notifier_call = siw_netdev_event, >> +}; >> + >> +/* >> + * siw_init_module - Initialize Softiwarp module and register with >netdev >> + * subsystem to create Softiwarp devices per >net_device >> + */ >> +static __init int siw_init_module(void) >> +{ >> + int rv; >> + int nr_cpu; >> + >> + if (SENDPAGE_THRESH < SIW_MAX_INLINE) { >> + pr_info("siw: sendpage threshold too small: %u\n", >> + (int)SENDPAGE_THRESH); >> + rv = EINVAL; >> + goto out; >> + } >> + /* >> + * The xprtrdma module needs at least some rudimentary bus to set >> + * some devices path MTU. >> + */ >> + rv = bus_register(&siw_bus); > >bus register for the driver? no way. OK. I will have to rework that part. > >> + if (rv) >> + goto out_nobus; >> + >> + siw_generic_dma_device.bus = &siw_bus; >> + >> + rv = device_register(&siw_generic_dma_device); >> + if (rv) >> + goto out; >> + >> + rv = siw_cm_init(); >> + if (rv) >> + goto out_unregister; >> + >> + if (DPRINT_MASK) >> + siw_debug_init(); >> + >> + /* >> + * Allocate CRC SHASH object. Fail loading siw only, if CRC is >> + * required by kernel module >> + */ >> + siw_crypto_shash = crypto_alloc_shash("crc32c", 0, 0); >> + if (IS_ERR(siw_crypto_shash)) { >> + pr_info("siw: Loading CRC32c failed: %ld\n", >> + PTR_ERR(siw_crypto_shash)); >> + siw_crypto_shash = NULL; >> + if (mpa_crc_required == true) >> + goto out_unregister; >> + } >> + rv = register_netdevice_notifier(&siw_netdev_nb); >> + if (rv) { >> + siw_debugfs_delete(); >> + goto out_unregister; >> + } >> + for (nr_cpu = 0; nr_cpu < MAX_CPU; nr_cpu++) >> + qp_tx_thread[nr_cpu] = NULL; >> + >> + if (siw_create_tx_threads(MAX_CPU, 1) == 0) { >> + pr_info("Try starting default TX thread\n"); >> + if (siw_create_tx_threads(1, 0) == 0) { >> + pr_info("Could not start any TX thread\n"); >> + goto out_unregister; >> + } >> + } >> + pr_info("SoftiWARP attached\n"); >> + return 0; >> + >> +out_unregister: >> + for (nr_cpu = 0; nr_cpu < MAX_CPU; nr_cpu++) { >> + if (qp_tx_thread[nr_cpu]) { >> + siw_stop_tx_thread(nr_cpu); >> + qp_tx_thread[nr_cpu] = NULL; >> + } >> + } >> + device_unregister(&siw_generic_dma_device); >> + >> + if (siw_crypto_shash) >> + crypto_free_shash(siw_crypto_shash); >> +out: >> + bus_unregister(&siw_bus); >> +out_nobus: >> + pr_info("SoftIWARP attach failed. Error: %d\n", rv); >> + siw_cm_exit(); >> + >> + return rv; >> +} >> + >> + >> +static void __exit siw_exit_module(void) >> +{ >> + int nr_cpu; >> + >> + for (nr_cpu = 0; nr_cpu < MAX_CPU; nr_cpu++) { >> + if (qp_tx_thread[nr_cpu]) { >> + siw_stop_tx_thread(nr_cpu); >> + qp_tx_thread[nr_cpu] = NULL; >> + } >> + } >> + unregister_netdevice_notifier(&siw_netdev_nb); >> + >> + siw_cm_exit(); >> + >> + while (!list_empty(&siw_devlist)) { >> + struct siw_dev *sdev = >> + list_entry(siw_devlist.next, struct siw_dev, list); >> + list_del(&sdev->list); >> + if (sdev->is_registered) >> + siw_device_deregister(sdev); >> + >> + siw_device_destroy(sdev); >> + } >> + if (siw_crypto_shash) >> + crypto_free_shash(siw_crypto_shash); >> + >> + siw_debugfs_delete(); >> + >> + device_unregister(&siw_generic_dma_device); >> + >> + bus_unregister(&siw_bus); >> + >> + pr_info("SoftiWARP detached\n"); >> +} >> + >> +module_init(siw_init_module); >> +module_exit(siw_exit_module); >> -- >> 2.13.6 >> >> -- >> To unsubscribe from this list: send the line "unsubscribe >linux-rdma" in >> the body of a message to majordomo@vger.kernel.org >> More majordomo info at http://vger.kernel.org/majordomo-info.html > [attachment "signature.asc" removed by Bernard Metzler/Zurich/IBM] -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Sat, Oct 14, 2017 at 01:28:43AM +0000, Bernard Metzler wrote: > -----Leon Romanovsky <leon@kernel.org> wrote: ----- > > >> + > >> +#define SIW_MAX_IF 12 > >> +static int if_cnt; > >> +static char *iface_list[SIW_MAX_IF] = {[0 ... (SIW_MAX_IF-1)] = > >'\0'}; > >> +module_param_array(iface_list, charp, &if_cnt, 0444); > >> +MODULE_PARM_DESC(iface_list, "Interface list siw attaches to if > >present"); > >> + > >> +static bool loopback_enabled = 1; > >> +module_param(loopback_enabled, bool, 0644); > >> +MODULE_PARM_DESC(loopback_enabled, "enable_loopback"); > >> + > >> +LIST_HEAD(siw_devlist); > >> + > >> +static int cpu_cnt; > >> +static char *tx_cpu_list[MAX_CPU] = {[0 ... (MAX_CPU-1)] = '\0'}; > >> +module_param_array(tx_cpu_list, charp, &cpu_cnt, 0444); > >> +MODULE_PARM_DESC(tx_cpu_list, "List of CPUs siw TX thread shall be > >bound to"); > > > >No module parameters please. > OK. Would you have a pointer which sheds some light on that > rule? Thank you! > It is not rule, but common knowledge exactly as BUG_ON which is not prohibited, but makes no sense in low level driver code. The module parameters sometimes make sense, for example in subsystem level where they can apply to whole devices underneath. But most of the time, they indicate complete ignorance of users in favor of easy developer's life. For people, like me, who doesn't run modules at all, the change in module parameters require rebuild of initramfs and in more extreme cases rebuild of SELinux labels. > > > >> + > >> +int default_tx_cpu = -1; > >> +struct task_struct *qp_tx_thread[MAX_CPU]; > >> +struct crypto_shash *siw_crypto_shash; > >> + > >> +static ssize_t show_sw_version(struct device *dev, > >> + struct device_attribute *attr, char *buf) > >> +{ > >> + struct siw_dev *sdev = container_of(dev, struct siw_dev, > >ofa_dev.dev); > > > >Please remove "ofa_*" from this code, upstream has nothing to do with > >OFA. > OK, sure. > That came from the fact it felt wired to write an iWarp driver > and use ib_*. Shall we stick to history and name those things ib > even if we have now 4 transports, or can we come up with something > more generic? ofa obviously is not a good idea. We already have rdma_* notation for not-IB protocols, in long run, we will remove drivers/infiniband to be drivers/rdma. Thanks
On Sat, Oct 14, 2017 at 09:41:32AM +0300, Leon Romanovsky wrote: > We already have rdma_* notation for not-IB protocols, in long run, > we will remove drivers/infiniband to be drivers/rdma. s/remove/rename > > Thanks
> Signed-off-by: Bernard Metzler <bmt@zurich.ibm.com> > --- > drivers/infiniband/sw/siw/siw_main.c | 752 > +++++++++++++++++++++++++++++++++++ > 1 file changed, 752 insertions(+) > create mode 100644 drivers/infiniband/sw/siw/siw_main.c > > diff --git a/drivers/infiniband/sw/siw/siw_main.c > b/drivers/infiniband/sw/siw/siw_main.c > new file mode 100644 > index 000000000000..5a054c6becaa > --- /dev/null > +++ b/drivers/infiniband/sw/siw/siw_main.c > @@ -0,0 +1,752 @@ > +/* > + * Software iWARP device driver for Linux > + * > + * Authors: Bernard Metzler <bmt@zurich.ibm.com> > + * > + * Copyright (c) 2008-2017, IBM Corporation > + * > + * This software is available to you under a choice of one of two > + * licenses. You may choose to be licensed under the terms of the GNU > + * General Public License (GPL) Version 2, available from the file > + * COPYING in the main directory of this source tree, or the > + * BSD license below: > + * > + * Redistribution and use in source and binary forms, with or > + * without modification, are permitted provided that the following > + * conditions are met: > + * > + * - Redistributions of source code must retain the above copyright notice, > + * this list of conditions and the following disclaimer. > + * > + * - Redistributions in binary form must reproduce the above copyright > + * notice, this list of conditions and the following disclaimer in the > + * documentation and/or other materials provided with the distribution. > + * > + * - Neither the name of IBM nor the names of its contributors may be > + * used to endorse or promote products derived from this software without > + * specific prior written permission. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND > + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT > HOLDERS > + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN > + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN > + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE > + * SOFTWARE. > + */ > + > +#include <linux/init.h> > +#include <linux/errno.h> > +#include <linux/netdevice.h> > +#include <linux/inetdevice.h> > +#include <net/net_namespace.h> > +#include <linux/rtnetlink.h> > +#include <linux/if_arp.h> > +#include <linux/list.h> > +#include <linux/kernel.h> > +#include <linux/dma-mapping.h> > + > +#include <rdma/ib_verbs.h> > +#include <rdma/ib_smi.h> > +#include <rdma/ib_user_verbs.h> > + > +#include "siw.h" > +#include "siw_obj.h" > +#include "siw_cm.h" > +#include "siw_verbs.h" > +#include <linux/kthread.h> > + > + > +MODULE_AUTHOR("Bernard Metzler"); > +MODULE_DESCRIPTION("Software iWARP Driver"); > +MODULE_LICENSE("Dual BSD/GPL"); > +MODULE_VERSION("0.2"); > + > +#define SIW_MAX_IF 12 > +static int if_cnt; > +static char *iface_list[SIW_MAX_IF] = {[0 ... (SIW_MAX_IF-1)] = '\0'}; > +module_param_array(iface_list, charp, &if_cnt, 0444); > +MODULE_PARM_DESC(iface_list, "Interface list siw attaches to if present"); > + > +static bool loopback_enabled = 1; > +module_param(loopback_enabled, bool, 0644); > +MODULE_PARM_DESC(loopback_enabled, "enable_loopback"); (ignoring the fact that you will have to remove your module options)... Why do you have a knob to disable loopback? > + > +LIST_HEAD(siw_devlist); > + > +static int cpu_cnt; > +static char *tx_cpu_list[MAX_CPU] = {[0 ... (MAX_CPU-1)] = '\0'}; > +module_param_array(tx_cpu_list, charp, &cpu_cnt, 0444); > +MODULE_PARM_DESC(tx_cpu_list, "List of CPUs siw TX thread shall be bound to"); > + > +int default_tx_cpu = -1; > +struct task_struct *qp_tx_thread[MAX_CPU]; > +struct crypto_shash *siw_crypto_shash; > + > +static ssize_t show_sw_version(struct device *dev, > + struct device_attribute *attr, char *buf) > +{ > + struct siw_dev *sdev = container_of(dev, struct siw_dev, ofa_dev.dev); > + > + return sprintf(buf, "%x\n", sdev->attrs.version); > +} > + > +static DEVICE_ATTR(sw_version, 0444, show_sw_version, NULL); > + > +static struct device_attribute *siw_dev_attributes[] = { > + &dev_attr_sw_version > +}; > + > +static void siw_device_release(struct device *dev) > +{ > + pr_info("%s device released\n", dev_name(dev)); > +} > + > +static struct device siw_generic_dma_device = { > + .dma_ops = &siw_dma_generic_ops, > + .init_name = "software-rdma-v2", > + .release = siw_device_release > +}; > + > +static struct bus_type siw_bus = { > + .name = "siw", > +}; > + > +static int siw_modify_port(struct ib_device *ofa_dev, u8 port, int mask, > + struct ib_port_modify *props) > +{ > + return -EOPNOTSUPP; > +} > + > + > +static void siw_device_register(struct siw_dev *sdev) > +{ > + struct ib_device *ofa_dev = &sdev->ofa_dev; > + int rv, i; > + static int dev_id = 1; > + > + rv = ib_register_device(ofa_dev, NULL); > + if (rv) { > + dprint(DBG_DM|DBG_ON, " %s: ib register error: rv=%d\n", > + ofa_dev->name, rv); > + return; > + } > + > + for (i = 0; i < ARRAY_SIZE(siw_dev_attributes); ++i) { > + rv = device_create_file(&ofa_dev->dev, siw_dev_attributes[i]); > + if (rv) { > + dprint(DBG_DM|DBG_ON, " %s: create file error: > rv=%d\n", > + ofa_dev->name, rv); > + ib_unregister_device(ofa_dev); > + return; > + } > + } > + siw_debugfs_add_device(sdev); > + > + sdev->attrs.vendor_part_id = dev_id++; > + > + dprint(DBG_DM, ": '%s' at '%s', > HWaddr=%02x.%02x.%02x.%02x.%02x.%02x\n", > + ofa_dev->name, sdev->netdev->name, > + *(u8 *)sdev->netdev->dev_addr, > + *((u8 *)sdev->netdev->dev_addr + 1), > + *((u8 *)sdev->netdev->dev_addr + 2), > + *((u8 *)sdev->netdev->dev_addr + 3), > + *((u8 *)sdev->netdev->dev_addr + 4), > + *((u8 *)sdev->netdev->dev_addr + 5)); > + > + sdev->is_registered = 1; > +} > + > +static void siw_device_deregister(struct siw_dev *sdev) > +{ > + int i; > + > + siw_debugfs_del_device(sdev); > + > + if (sdev->is_registered) { > + > + dprint(DBG_DM, ": deregister %s at %s\n", sdev->ofa_dev.name, > + sdev->netdev->name); > + > + for (i = 0; i < ARRAY_SIZE(siw_dev_attributes); ++i) > + device_remove_file(&sdev->ofa_dev.dev, > + siw_dev_attributes[i]); > + > + ib_unregister_device(&sdev->ofa_dev); > + } > + if (atomic_read(&sdev->num_ctx) || atomic_read(&sdev->num_srq) || > + atomic_read(&sdev->num_mem) || atomic_read(&sdev->num_cep) || > + atomic_read(&sdev->num_qp) || atomic_read(&sdev->num_cq) || > + atomic_read(&sdev->num_pd)) { > + pr_warn("SIW at %s: orphaned resources!\n", sdev->netdev- > >name); > + pr_warn("CTX %d, SRQ %d, QP %d, CQ %d, MEM %d, CEP %d, PD > %d\n", > + atomic_read(&sdev->num_ctx), > + atomic_read(&sdev->num_srq), > + atomic_read(&sdev->num_qp), > + atomic_read(&sdev->num_cq), > + atomic_read(&sdev->num_mem), > + atomic_read(&sdev->num_cep), > + atomic_read(&sdev->num_pd)); > + } > + i = 0; > + > + while (!list_empty(&sdev->cep_list)) { > + struct siw_cep *cep = list_entry(sdev->cep_list.next, > + struct siw_cep, devq); > + list_del(&cep->devq); > + dprint(DBG_ON, ": Free CEP (0x%p), state: %d\n", > + cep, cep->state); > + kfree(cep); > + i++; > + } > + if (i) > + pr_warn("%s: free'd %d CEPs\n", __func__, i); > + > + sdev->is_registered = 0; > +} > + > +static void siw_device_destroy(struct siw_dev *sdev) > +{ > + dprint(DBG_DM, ": destroy siw device at %s\n", sdev->netdev->name); > + > + siw_idr_release(sdev); > + kfree(sdev->ofa_dev.iwcm); > + dev_put(sdev->netdev); > + ib_dealloc_device(&sdev->ofa_dev); > +} > + > + > +static int siw_match_iflist(struct net_device *dev) > +{ > + int i; > + > + if (if_cnt == 0) > + return 1; > + > + if_cnt = min_t(int, SIW_MAX_IF, if_cnt); > + > + for (i = 0; i < if_cnt; i++) > + if (!strcmp(iface_list[i], dev->name)) > + return 1; > + return 0; > +} > + > +static struct siw_dev *siw_dev_from_netdev(struct net_device *dev) > +{ > + if (!list_empty(&siw_devlist)) { > + struct list_head *pos; > + > + list_for_each(pos, &siw_devlist) { > + struct siw_dev *sdev = > + list_entry(pos, struct siw_dev, list); > + if (sdev->netdev == dev) > + return sdev; > + } > + } > + return NULL; > +} > + > +static int siw_tx_qualified(int cpu) > +{ > + int i; > + > + if (cpu_cnt == 0) > + return 1; > + > + for (i = 0; i < cpu_cnt; i++) { > + int new_cpu; > + > + if (kstrtoint(tx_cpu_list[i], 0, &new_cpu)) > + continue; > + if (cpu == new_cpu) > + return 1; > + } > + return 0; > +} > + > +static int siw_create_tx_threads(int max_threads, int check_qualified) > +{ > + int cpu, rv, assigned = 0; > + > + if (max_threads < 0 || max_threads > MAX_CPU) > + return 0; > + > + for_each_online_cpu(cpu) { > + if (siw_tx_qualified(cpu)) { > + qp_tx_thread[cpu] = > + kthread_create(siw_run_sq, > + (unsigned long *)(long)cpu, > + "qp_tx_thread/%d", cpu); > + kthread_bind(qp_tx_thread[cpu], cpu); > + if (IS_ERR(qp_tx_thread)) { > + rv = PTR_ERR(qp_tx_thread); > + qp_tx_thread[cpu] = NULL; > + pr_info("Binding TX thread to CPU %d failed", > + cpu); > + break; > + } > + wake_up_process(qp_tx_thread[cpu]); > + assigned++; > + if (default_tx_cpu < 0) > + default_tx_cpu = cpu; > + if (assigned >= max_threads) > + break; > + } > + } > + return assigned; > +} > + > +static int siw_dev_qualified(struct net_device *netdev) > +{ > + if (!siw_match_iflist(netdev)) { > + dprint(DBG_DM, ": %s (not selected)\n", > + netdev->name); > + return 0; > + } > + /* > + * Additional hardware support can be added here > + * (e.g. ARPHRD_FDDI, ARPHRD_ATM, ...) - see > + * <linux/if_arp.h> for type identifiers. > + */ > + if (netdev->type == ARPHRD_ETHER || > + netdev->type == ARPHRD_IEEE802 || > + netdev->type == ARPHRD_INFINIBAND || > + (netdev->type == ARPHRD_LOOPBACK && loopback_enabled)) > + return 1; > + iWARP over IPoIB/Infiniband? :) > + return 0; > +} > + > +static void siw_verbs_sq_flush(struct ib_qp *ofa_qp) > +{ > + struct siw_qp *qp = siw_qp_ofa2siw(ofa_qp); > + > + down_write(&qp->state_lock); > + siw_sq_flush(qp); > + up_write(&qp->state_lock); > +} > + > +static void siw_verbs_rq_flush(struct ib_qp *ofa_qp) > +{ > + struct siw_qp *qp = siw_qp_ofa2siw(ofa_qp); > + > + down_write(&qp->state_lock); > + siw_rq_flush(qp); > + up_write(&qp->state_lock); > +} > + > +static struct ib_ah *siw_create_ah(struct ib_pd *pd, struct rdma_ah_attr *attr, > + struct ib_udata *udata) > +{ > + return ERR_PTR(-EOPNOTSUPP); > +} > + > +static int siw_destroy_ah(struct ib_ah *ah) > +{ > + return -EOPNOTSUPP; > +} > + > + > +static struct siw_dev *siw_device_create(struct net_device *netdev) > +{ > + struct siw_dev *sdev = (struct siw_dev *)ib_alloc_device(sizeof(*sdev)); > + struct ib_device *ofa_dev; > + > + if (!sdev) > + goto out; > + > + ofa_dev = &sdev->ofa_dev; > + > + ofa_dev->iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL); > + if (!ofa_dev->iwcm) { > + ib_dealloc_device(ofa_dev); > + sdev = NULL; > + goto out; > + } > + > + sdev->netdev = netdev; > + list_add_tail(&sdev->list, &siw_devlist); > + > + strcpy(ofa_dev->name, SIW_IBDEV_PREFIX); > + strlcpy(ofa_dev->name + strlen(SIW_IBDEV_PREFIX), netdev->name, > + IB_DEVICE_NAME_MAX - strlen(SIW_IBDEV_PREFIX)); > + > + memset(&ofa_dev->node_guid, 0, sizeof(ofa_dev->node_guid)); > + if (netdev->type != ARPHRD_LOOPBACK) > + memcpy(&ofa_dev->node_guid, netdev->dev_addr, 6); > + else { > + /* > + * The loopback device does not have a HW address, > + * but connection mangagement lib expects gid != 0 > + */ > + size_t gidlen = min_t(size_t, strlen(ofa_dev->name), 6); > + > + memcpy(&ofa_dev->node_guid, ofa_dev->name, gidlen); > + } > + ofa_dev->owner = THIS_MODULE; > + > + ofa_dev->uverbs_cmd_mask = > + (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | > + (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | > + (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | > + (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | > + (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | > + (1ull << IB_USER_VERBS_CMD_REG_MR) | > + (1ull << IB_USER_VERBS_CMD_DEREG_MR) | > + (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | > + (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | > + (1ull << IB_USER_VERBS_CMD_POLL_CQ) | > + (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | > + (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | > + (1ull << IB_USER_VERBS_CMD_CREATE_QP) | > + (1ull << IB_USER_VERBS_CMD_QUERY_QP) | > + (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | > + (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | > + (1ull << IB_USER_VERBS_CMD_POST_SEND) | > + (1ull << IB_USER_VERBS_CMD_POST_RECV) | > + (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | > + (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | > + (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | > + (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | > + (1ull << IB_USER_VERBS_CMD_REG_MR) | > + (1ull << IB_USER_VERBS_CMD_DEREG_MR) | > + (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV); > + > + ofa_dev->node_type = RDMA_NODE_RNIC; > + memcpy(ofa_dev->node_desc, SIW_NODE_DESC_COMMON, > + sizeof(SIW_NODE_DESC_COMMON)); > + > + /* > + * Current model (one-to-one device association): > + * One Softiwarp device per net_device or, equivalently, > + * per physical port. > + */ > + ofa_dev->phys_port_cnt = 1; > + > + ofa_dev->num_comp_vectors = num_possible_cpus(); > + ofa_dev->dev.parent = &siw_generic_dma_device; > + ofa_dev->query_device = siw_query_device; > + ofa_dev->query_port = siw_query_port; > + ofa_dev->get_port_immutable = siw_get_port_immutable; > + ofa_dev->query_qp = siw_query_qp; > + ofa_dev->modify_port = siw_modify_port; > + ofa_dev->query_pkey = siw_query_pkey; > + ofa_dev->query_gid = siw_query_gid; > + ofa_dev->alloc_ucontext = siw_alloc_ucontext; > + ofa_dev->dealloc_ucontext = siw_dealloc_ucontext; > + ofa_dev->mmap = siw_mmap; > + ofa_dev->alloc_pd = siw_alloc_pd; > + ofa_dev->dealloc_pd = siw_dealloc_pd; > + ofa_dev->create_ah = siw_create_ah; > + ofa_dev->destroy_ah = siw_destroy_ah; > + ofa_dev->create_qp = siw_create_qp; > + ofa_dev->modify_qp = siw_verbs_modify_qp; > + ofa_dev->destroy_qp = siw_destroy_qp; > + ofa_dev->create_cq = siw_create_cq; > + ofa_dev->destroy_cq = siw_destroy_cq; > + ofa_dev->resize_cq = NULL; > + ofa_dev->poll_cq = siw_poll_cq; > + ofa_dev->get_dma_mr = siw_get_dma_mr; > + ofa_dev->reg_user_mr = siw_reg_user_mr; > + ofa_dev->dereg_mr = siw_dereg_mr; > + ofa_dev->alloc_mr = siw_alloc_mr; > + ofa_dev->map_mr_sg = siw_map_mr_sg; > + ofa_dev->dealloc_mw = NULL; > + > + ofa_dev->create_srq = siw_create_srq; > + ofa_dev->modify_srq = siw_modify_srq; > + ofa_dev->query_srq = siw_query_srq; > + ofa_dev->destroy_srq = siw_destroy_srq; > + ofa_dev->post_srq_recv = siw_post_srq_recv; > + > + ofa_dev->attach_mcast = NULL; > + ofa_dev->detach_mcast = NULL; > + ofa_dev->process_mad = siw_no_mad; > + > + ofa_dev->req_notify_cq = siw_req_notify_cq; > + ofa_dev->post_send = siw_post_send; > + ofa_dev->post_recv = siw_post_receive; > + > + ofa_dev->drain_sq = siw_verbs_sq_flush; > + ofa_dev->drain_rq = siw_verbs_rq_flush; > + > + ofa_dev->dev.dma_ops = &dma_virt_ops; > + > + ofa_dev->iwcm->connect = siw_connect; > + ofa_dev->iwcm->accept = siw_accept; > + ofa_dev->iwcm->reject = siw_reject; > + ofa_dev->iwcm->create_listen = siw_create_listen; > + ofa_dev->iwcm->destroy_listen = siw_destroy_listen; > + ofa_dev->iwcm->add_ref = siw_qp_get_ref; > + ofa_dev->iwcm->rem_ref = siw_qp_put_ref; > + ofa_dev->iwcm->get_qp = siw_get_ofaqp; > + > + sdev->attrs.version = VERSION_ID_SOFTIWARP; > + sdev->attrs.vendor_id = SIW_VENDOR_ID; > + sdev->attrs.vendor_part_id = SIW_VENDORT_PART_ID; > + sdev->attrs.sw_version = VERSION_ID_SOFTIWARP; > + sdev->attrs.max_qp = SIW_MAX_QP; > + sdev->attrs.max_qp_wr = SIW_MAX_QP_WR; > + sdev->attrs.max_ord = SIW_MAX_ORD_QP; > + sdev->attrs.max_ird = SIW_MAX_IRD_QP; > + sdev->attrs.cap_flags = IB_DEVICE_MEM_MGT_EXTENSIONS; > + sdev->attrs.max_sge = SIW_MAX_SGE; > + sdev->attrs.max_sge_rd = SIW_MAX_SGE_RD; > + sdev->attrs.max_cq = SIW_MAX_CQ; > + sdev->attrs.max_cqe = SIW_MAX_CQE; > + sdev->attrs.max_mr = SIW_MAX_MR; > + sdev->attrs.max_mr_size = rlimit(RLIMIT_MEMLOCK); > + sdev->attrs.max_pd = SIW_MAX_PD; > + sdev->attrs.max_mw = SIW_MAX_MW; > + sdev->attrs.max_fmr = SIW_MAX_FMR; > + sdev->attrs.max_srq = SIW_MAX_SRQ; > + sdev->attrs.max_srq_wr = SIW_MAX_SRQ_WR; > + sdev->attrs.max_srq_sge = SIW_MAX_SGE; > + > + siw_idr_init(sdev); > + INIT_LIST_HEAD(&sdev->cep_list); > + INIT_LIST_HEAD(&sdev->qp_list); > + > + atomic_set(&sdev->num_ctx, 0); > + atomic_set(&sdev->num_srq, 0); > + atomic_set(&sdev->num_qp, 0); > + atomic_set(&sdev->num_cq, 0); > + atomic_set(&sdev->num_mem, 0); > + atomic_set(&sdev->num_pd, 0); > + atomic_set(&sdev->num_cep, 0); > + > + sdev->is_registered = 0; > +out: > + if (sdev) > + dev_hold(netdev); > + > + return sdev; > +} > + > + > + > +static int siw_netdev_event(struct notifier_block *nb, unsigned long event, > + void *arg) > +{ > + struct net_device *netdev = netdev_notifier_info_to_dev(arg); > + struct in_device *in_dev; > + struct siw_dev *sdev; > + > + dprint(DBG_DM, " (dev=%s): Event %lu\n", netdev->name, event); > + > + if (dev_net(netdev) != &init_net) > + goto done; > + > + sdev = siw_dev_from_netdev(netdev); > + > + switch (event) { > + > + case NETDEV_UP: > + if (!sdev) > + break; > + > + if (sdev->is_registered) { > + sdev->state = IB_PORT_ACTIVE; > + siw_port_event(sdev, 1, IB_EVENT_PORT_ACTIVE); > + break; > + } > + > + in_dev = in_dev_get(netdev); > + if (!in_dev) { > + dprint(DBG_DM, ": %s: no in_dev\n", netdev->name); > + sdev->state = IB_PORT_INIT; > + break; > + } > + > + if (in_dev->ifa_list) { > + sdev->state = IB_PORT_ACTIVE; > + siw_device_register(sdev); > + } else { > + dprint(DBG_DM, ": %s: no ifa\n", netdev->name); > + sdev->state = IB_PORT_INIT; > + } > + in_dev_put(in_dev); > + > + break; > + > + case NETDEV_DOWN: > + if (sdev && sdev->is_registered) { > + sdev->state = IB_PORT_DOWN; > + siw_port_event(sdev, 1, IB_EVENT_PORT_ERR); > + break; > + } > + break; > + > + case NETDEV_REGISTER: > + if (!sdev) { > + if (!siw_dev_qualified(netdev)) > + break; > + > + sdev = siw_device_create(netdev); > + if (sdev) { > + sdev->state = IB_PORT_INIT; > + dprint(DBG_DM, ": new siw device for %s\n", > + netdev->name); > + } > + } > + break; > + > + case NETDEV_UNREGISTER: > + if (sdev) { > + if (sdev->is_registered) > + siw_device_deregister(sdev); > + list_del(&sdev->list); > + siw_device_destroy(sdev); > + } > + break; > + > + case NETDEV_CHANGEADDR: > + if (sdev->is_registered) > + siw_port_event(sdev, 1, IB_EVENT_LID_CHANGE); > + > + break; > + /* > + * Todo: Below netdev events are currently not handled. > + */ > + case NETDEV_CHANGEMTU: > + case NETDEV_GOING_DOWN: > + case NETDEV_CHANGE: Probably need to handle mtu changes and mac address changes... > + > + break; > + > + default: > + break; > + } > +done: > + return NOTIFY_OK; > +} > + > +static struct notifier_block siw_netdev_nb = { > + .notifier_call = siw_netdev_event, > +}; > + > +/* > + * siw_init_module - Initialize Softiwarp module and register with netdev > + * subsystem to create Softiwarp devices per net_device > + */ > +static __init int siw_init_module(void) > +{ > + int rv; > + int nr_cpu; > + > + if (SENDPAGE_THRESH < SIW_MAX_INLINE) { > + pr_info("siw: sendpage threshold too small: %u\n", > + (int)SENDPAGE_THRESH); > + rv = EINVAL; > + goto out; > + } Should this be a compile time failure? BUILD_BUG_ON()? > + /* > + * The xprtrdma module needs at least some rudimentary bus to set > + * some devices path MTU. > + */ > + rv = bus_register(&siw_bus); > + if (rv) > + goto out_nobus; > + > + siw_generic_dma_device.bus = &siw_bus; > + > + rv = device_register(&siw_generic_dma_device); > + if (rv) > + goto out; > + > + rv = siw_cm_init(); > + if (rv) > + goto out_unregister; > + > + if (DPRINT_MASK) > + siw_debug_init(); > + > + /* > + * Allocate CRC SHASH object. Fail loading siw only, if CRC is > + * required by kernel module > + */ > + siw_crypto_shash = crypto_alloc_shash("crc32c", 0, 0); > + if (IS_ERR(siw_crypto_shash)) { > + pr_info("siw: Loading CRC32c failed: %ld\n", > + PTR_ERR(siw_crypto_shash)); > + siw_crypto_shash = NULL; > + if (mpa_crc_required == true) > + goto out_unregister; > + } > + rv = register_netdevice_notifier(&siw_netdev_nb); > + if (rv) { > + siw_debugfs_delete(); > + goto out_unregister; > + } > + for (nr_cpu = 0; nr_cpu < MAX_CPU; nr_cpu++) > + qp_tx_thread[nr_cpu] = NULL; > + > + if (siw_create_tx_threads(MAX_CPU, 1) == 0) { > + pr_info("Try starting default TX thread\n"); > + if (siw_create_tx_threads(1, 0) == 0) { > + pr_info("Could not start any TX thread\n"); > + goto out_unregister; > + } > + } > + pr_info("SoftiWARP attached\n"); > + return 0; > + > +out_unregister: > + for (nr_cpu = 0; nr_cpu < MAX_CPU; nr_cpu++) { > + if (qp_tx_thread[nr_cpu]) { > + siw_stop_tx_thread(nr_cpu); > + qp_tx_thread[nr_cpu] = NULL; > + } > + } > + device_unregister(&siw_generic_dma_device); > + > + if (siw_crypto_shash) > + crypto_free_shash(siw_crypto_shash); > +out: > + bus_unregister(&siw_bus); > +out_nobus: > + pr_info("SoftIWARP attach failed. Error: %d\n", rv); > + siw_cm_exit(); > + > + return rv; > +} > + > + > +static void __exit siw_exit_module(void) > +{ > + int nr_cpu; > + > + for (nr_cpu = 0; nr_cpu < MAX_CPU; nr_cpu++) { > + if (qp_tx_thread[nr_cpu]) { > + siw_stop_tx_thread(nr_cpu); > + qp_tx_thread[nr_cpu] = NULL; > + } > + } > + unregister_netdevice_notifier(&siw_netdev_nb); > + > + siw_cm_exit(); > + > + while (!list_empty(&siw_devlist)) { > + struct siw_dev *sdev = > + list_entry(siw_devlist.next, struct siw_dev, list); > + list_del(&sdev->list); > + if (sdev->is_registered) > + siw_device_deregister(sdev); > + > + siw_device_destroy(sdev); > + } > + if (siw_crypto_shash) > + crypto_free_shash(siw_crypto_shash); > + > + siw_debugfs_delete(); > + > + device_unregister(&siw_generic_dma_device); > + > + bus_unregister(&siw_bus); > + > + pr_info("SoftiWARP detached\n"); > +} > + > +module_init(siw_init_module); > +module_exit(siw_exit_module); > -- > 2.13.6 > > -- > To unsubscribe from this list: send the line "unsubscribe linux-rdma" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Oct 19, 2017 at 11:53:45AM -0500, Steve Wise wrote: > > +#define SIW_MAX_IF 12 > > +static int if_cnt; > > +static char *iface_list[SIW_MAX_IF] = {[0 ... (SIW_MAX_IF-1)] = '\0'}; > > +module_param_array(iface_list, charp, &if_cnt, 0444); > > +MODULE_PARM_DESC(iface_list, "Interface list siw attaches to if > > present"); Also, NAK on this. I know this is what rxe does, but we shouldn't have allowed that either. Add a RDMA-netlink thing to manage siw attach/detatch. Jason -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
-----Leon Romanovsky <leon@kernel.org> wrote: ----- >To: Bernard Metzler <bmt@zurich.ibm.com> >From: Leon Romanovsky <leon@kernel.org> >Date: 10/08/2017 03:03PM >Cc: linux-rdma@vger.kernel.org >Subject: Re: [PATCH v2 03/13] Attach/detach SoftiWarp to/from network >and RDMA subsystem > >On Fri, Oct 06, 2017 at 08:28:43AM -0400, Bernard Metzler wrote: >> Signed-off-by: Bernard Metzler <bmt@zurich.ibm.com> >> --- >> drivers/infiniband/sw/siw/siw_main.c | 752 >+++++++++++++++++++++++++++++++++++ >> 1 file changed, 752 insertions(+) >> create mode 100644 drivers/infiniband/sw/siw/siw_main.c >> >> diff --git a/drivers/infiniband/sw/siw/siw_main.c >b/drivers/infiniband/sw/siw/siw_main.c >> new file mode 100644 >> index 000000000000..5a054c6becaa >> --- /dev/null >> +++ b/drivers/infiniband/sw/siw/siw_main.c >> @@ -0,0 +1,752 @@ >> +/* >> + * Software iWARP device driver for Linux >> + * >> + * Authors: Bernard Metzler <bmt@zurich.ibm.com> >> + * >> + * Copyright (c) 2008-2017, IBM Corporation >> + * >> + * This software is available to you under a choice of one of two >> + * licenses. You may choose to be licensed under the terms of the >GNU >> + * General Public License (GPL) Version 2, available from the file >> + * COPYING in the main directory of this source tree, or the >> + * BSD license below: >> + * >> + * Redistribution and use in source and binary forms, with or >> + * without modification, are permitted provided that the >following >> + * conditions are met: >> + * >> + * - Redistributions of source code must retain the above >copyright notice, >> + * this list of conditions and the following disclaimer. >> + * >> + * - Redistributions in binary form must reproduce the above >copyright >> + * notice, this list of conditions and the following >disclaimer in the >> + * documentation and/or other materials provided with the >distribution. >> + * >> + * - Neither the name of IBM nor the names of its contributors >may be >> + * used to endorse or promote products derived from this >software without >> + * specific prior written permission. >> + * >> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, >> + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES >OF >> + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND >> + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT >HOLDERS >> + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN >AN >> + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR >IN >> + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN >THE >> + * SOFTWARE. >> + */ >> + >> +#include <linux/init.h> >> +#include <linux/errno.h> >> +#include <linux/netdevice.h> >> +#include <linux/inetdevice.h> >> +#include <net/net_namespace.h> >> +#include <linux/rtnetlink.h> >> +#include <linux/if_arp.h> >> +#include <linux/list.h> >> +#include <linux/kernel.h> >> +#include <linux/dma-mapping.h> >> + >> +#include <rdma/ib_verbs.h> >> +#include <rdma/ib_smi.h> >> +#include <rdma/ib_user_verbs.h> >> + >> +#include "siw.h" >> +#include "siw_obj.h" >> +#include "siw_cm.h" >> +#include "siw_verbs.h" >> +#include <linux/kthread.h> >> + >> + >> +MODULE_AUTHOR("Bernard Metzler"); >> +MODULE_DESCRIPTION("Software iWARP Driver"); >> +MODULE_LICENSE("Dual BSD/GPL"); >> +MODULE_VERSION("0.2"); > >No module versions please, it is useless. > >> + >> +#define SIW_MAX_IF 12 >> +static int if_cnt; >> +static char *iface_list[SIW_MAX_IF] = {[0 ... (SIW_MAX_IF-1)] = >'\0'}; >> +module_param_array(iface_list, charp, &if_cnt, 0444); >> +MODULE_PARM_DESC(iface_list, "Interface list siw attaches to if >present"); >> + >> +static bool loopback_enabled = 1; >> +module_param(loopback_enabled, bool, 0644); >> +MODULE_PARM_DESC(loopback_enabled, "enable_loopback"); >> + >> +LIST_HEAD(siw_devlist); >> + >> +static int cpu_cnt; >> +static char *tx_cpu_list[MAX_CPU] = {[0 ... (MAX_CPU-1)] = '\0'}; >> +module_param_array(tx_cpu_list, charp, &cpu_cnt, 0444); >> +MODULE_PARM_DESC(tx_cpu_list, "List of CPUs siw TX thread shall be >bound to"); > >No module parameters please. > >> + >> +int default_tx_cpu = -1; >> +struct task_struct *qp_tx_thread[MAX_CPU]; >> +struct crypto_shash *siw_crypto_shash; >> + >> +static ssize_t show_sw_version(struct device *dev, >> + struct device_attribute *attr, char *buf) >> +{ >> + struct siw_dev *sdev = container_of(dev, struct siw_dev, >ofa_dev.dev); > >Please remove "ofa_*" from this code, upstream has nothing to do with >OFA. > >> + >> + return sprintf(buf, "%x\n", sdev->attrs.version); >> +} >> + >> +static DEVICE_ATTR(sw_version, 0444, show_sw_version, NULL); > >Why do you need? Does "rdma dev" work for you? > >> + >> +static struct device_attribute *siw_dev_attributes[] = { >> + &dev_attr_sw_version >> +}; >> + >> +static void siw_device_release(struct device *dev) >> +{ >> + pr_info("%s device released\n", dev_name(dev)); >> +} >> + >> +static struct device siw_generic_dma_device = { >> + .dma_ops = &siw_dma_generic_ops, >> + .init_name = "software-rdma-v2", >> + .release = siw_device_release >> +}; >> + >> +static struct bus_type siw_bus = { >> + .name = "siw", >> +}; >> + >> +static int siw_modify_port(struct ib_device *ofa_dev, u8 port, int >mask, >> + struct ib_port_modify *props) >> +{ >> + return -EOPNOTSUPP; >> +} > >The proper error code is ENOSYS and if the function is not >implemented, >it shouldn't be set. > >> + >> + >> +static void siw_device_register(struct siw_dev *sdev) >> +{ >> + struct ib_device *ofa_dev = &sdev->ofa_dev; > >It is Linux kernel code and not OFED. > >> + int rv, i; >> + static int dev_id = 1; >> + >> + rv = ib_register_device(ofa_dev, NULL); >> + if (rv) { >> + dprint(DBG_DM|DBG_ON, " %s: ib register error: rv=%d\n", >> + ofa_dev->name, rv); >> + return; >> + } >> + >> + for (i = 0; i < ARRAY_SIZE(siw_dev_attributes); ++i) { >> + rv = device_create_file(&ofa_dev->dev, siw_dev_attributes[i]); >> + if (rv) { >> + dprint(DBG_DM|DBG_ON, " %s: create file error: rv=%d\n", >> + ofa_dev->name, rv); >> + ib_unregister_device(ofa_dev); >> + return; >> + } >> + } >> + siw_debugfs_add_device(sdev); >> + >> + sdev->attrs.vendor_part_id = dev_id++; >> + >> + dprint(DBG_DM, ": '%s' at '%s', >HWaddr=%02x.%02x.%02x.%02x.%02x.%02x\n", >> + ofa_dev->name, sdev->netdev->name, >> + *(u8 *)sdev->netdev->dev_addr, >> + *((u8 *)sdev->netdev->dev_addr + 1), >> + *((u8 *)sdev->netdev->dev_addr + 2), >> + *((u8 *)sdev->netdev->dev_addr + 3), >> + *((u8 *)sdev->netdev->dev_addr + 4), >> + *((u8 *)sdev->netdev->dev_addr + 5)); >> + >> + sdev->is_registered = 1; >> +} >> + >> +static void siw_device_deregister(struct siw_dev *sdev) >> +{ >> + int i; >> + >> + siw_debugfs_del_device(sdev); >> + >> + if (sdev->is_registered) { >> + >> + dprint(DBG_DM, ": deregister %s at %s\n", sdev->ofa_dev.name, >> + sdev->netdev->name); >> + >> + for (i = 0; i < ARRAY_SIZE(siw_dev_attributes); ++i) >> + device_remove_file(&sdev->ofa_dev.dev, >> + siw_dev_attributes[i]); >> + >> + ib_unregister_device(&sdev->ofa_dev); >> + } >> + if (atomic_read(&sdev->num_ctx) || atomic_read(&sdev->num_srq) || >> + atomic_read(&sdev->num_mem) || atomic_read(&sdev->num_cep) || >> + atomic_read(&sdev->num_qp) || atomic_read(&sdev->num_cq) || >> + atomic_read(&sdev->num_pd)) { >> + pr_warn("SIW at %s: orphaned resources!\n", sdev->netdev->name); >> + pr_warn("CTX %d, SRQ %d, QP %d, CQ %d, MEM %d, CEP %d, PD %d\n", >> + atomic_read(&sdev->num_ctx), >> + atomic_read(&sdev->num_srq), >> + atomic_read(&sdev->num_qp), >> + atomic_read(&sdev->num_cq), >> + atomic_read(&sdev->num_mem), >> + atomic_read(&sdev->num_cep), >> + atomic_read(&sdev->num_pd)); >> + } >> + i = 0; >> + >> + while (!list_empty(&sdev->cep_list)) { >> + struct siw_cep *cep = list_entry(sdev->cep_list.next, >> + struct siw_cep, devq); >> + list_del(&cep->devq); >> + dprint(DBG_ON, ": Free CEP (0x%p), state: %d\n", >> + cep, cep->state); >> + kfree(cep); >> + i++; >> + } >> + if (i) >> + pr_warn("%s: free'd %d CEPs\n", __func__, i); >> + >> + sdev->is_registered = 0; >> +} >> + >> +static void siw_device_destroy(struct siw_dev *sdev) >> +{ >> + dprint(DBG_DM, ": destroy siw device at %s\n", >sdev->netdev->name); >> + >> + siw_idr_release(sdev); >> + kfree(sdev->ofa_dev.iwcm); >> + dev_put(sdev->netdev); >> + ib_dealloc_device(&sdev->ofa_dev); >> +} >> + >> + >> +static int siw_match_iflist(struct net_device *dev) >> +{ >> + int i; >> + >> + if (if_cnt == 0) > >No need to be explicit with "== 0". > >> + return 1; >> + >> + if_cnt = min_t(int, SIW_MAX_IF, if_cnt); >> + >> + for (i = 0; i < if_cnt; i++) >> + if (!strcmp(iface_list[i], dev->name)) >> + return 1; >> + return 0; >> +} >> + >> +static struct siw_dev *siw_dev_from_netdev(struct net_device *dev) >> +{ >> + if (!list_empty(&siw_devlist)) { >> + struct list_head *pos; >> + >> + list_for_each(pos, &siw_devlist) { >> + struct siw_dev *sdev = >> + list_entry(pos, struct siw_dev, list); >> + if (sdev->netdev == dev) >> + return sdev; >> + } >> + } >> + return NULL; >> +} >> + >> +static int siw_tx_qualified(int cpu) >> +{ >> + int i; >> + >> + if (cpu_cnt == 0) >> + return 1; >> + >> + for (i = 0; i < cpu_cnt; i++) { >> + int new_cpu; >> + >> + if (kstrtoint(tx_cpu_list[i], 0, &new_cpu)) >> + continue; >> + if (cpu == new_cpu) >> + return 1; >> + } >> + return 0; >> +} >> + >> +static int siw_create_tx_threads(int max_threads, int >check_qualified) >> +{ >> + int cpu, rv, assigned = 0; >> + >> + if (max_threads < 0 || max_threads > MAX_CPU) >> + return 0; >> + >> + for_each_online_cpu(cpu) { >> + if (siw_tx_qualified(cpu)) { >> + qp_tx_thread[cpu] = >> + kthread_create(siw_run_sq, >> + (unsigned long *)(long)cpu, >> + "qp_tx_thread/%d", cpu); > >You should have very good reasons to create kernel threads and >especially for each online CPU. > >> + kthread_bind(qp_tx_thread[cpu], cpu); >> + if (IS_ERR(qp_tx_thread)) { >> + rv = PTR_ERR(qp_tx_thread); >> + qp_tx_thread[cpu] = NULL; >> + pr_info("Binding TX thread to CPU %d failed", >> + cpu); >> + break; >> + } >> + wake_up_process(qp_tx_thread[cpu]); >> + assigned++; >> + if (default_tx_cpu < 0) >> + default_tx_cpu = cpu; >> + if (assigned >= max_threads) >> + break; >> + } >> + } >> + return assigned; >> +} >> + >> +static int siw_dev_qualified(struct net_device *netdev) >> +{ >> + if (!siw_match_iflist(netdev)) { >> + dprint(DBG_DM, ": %s (not selected)\n", >> + netdev->name); >> + return 0; >> + } >> + /* >> + * Additional hardware support can be added here >> + * (e.g. ARPHRD_FDDI, ARPHRD_ATM, ...) - see >> + * <linux/if_arp.h> for type identifiers. >> + */ >> + if (netdev->type == ARPHRD_ETHER || >> + netdev->type == ARPHRD_IEEE802 || >> + netdev->type == ARPHRD_INFINIBAND || >> + (netdev->type == ARPHRD_LOOPBACK && loopback_enabled)) >> + return 1; >> + >> + return 0; >> +} >> + >> +static void siw_verbs_sq_flush(struct ib_qp *ofa_qp) >> +{ >> + struct siw_qp *qp = siw_qp_ofa2siw(ofa_qp); >> + >> + down_write(&qp->state_lock); >> + siw_sq_flush(qp); >> + up_write(&qp->state_lock); >> +} >> + >> +static void siw_verbs_rq_flush(struct ib_qp *ofa_qp) >> +{ >> + struct siw_qp *qp = siw_qp_ofa2siw(ofa_qp); >> + >> + down_write(&qp->state_lock); >> + siw_rq_flush(qp); >> + up_write(&qp->state_lock); >> +} >> + >> +static struct ib_ah *siw_create_ah(struct ib_pd *pd, struct >rdma_ah_attr *attr, >> + struct ib_udata *udata) >> +{ >> + return ERR_PTR(-EOPNOTSUPP); >> +} >> + >> +static int siw_destroy_ah(struct ib_ah *ah) >> +{ >> + return -EOPNOTSUPP; >> +} >> + > >ENOSYS for both. > >> + >> +static struct siw_dev *siw_device_create(struct net_device >*netdev) >> +{ >> + struct siw_dev *sdev = (struct siw_dev >*)ib_alloc_device(sizeof(*sdev)); >> + struct ib_device *ofa_dev; >> + >> + if (!sdev) >> + goto out; >> + >> + ofa_dev = &sdev->ofa_dev; >> + >> + ofa_dev->iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL); >> + if (!ofa_dev->iwcm) { >> + ib_dealloc_device(ofa_dev); >> + sdev = NULL; >> + goto out; >> + } >> + >> + sdev->netdev = netdev; >> + list_add_tail(&sdev->list, &siw_devlist); >> + >> + strcpy(ofa_dev->name, SIW_IBDEV_PREFIX); >> + strlcpy(ofa_dev->name + strlen(SIW_IBDEV_PREFIX), netdev->name, >> + IB_DEVICE_NAME_MAX - strlen(SIW_IBDEV_PREFIX)); >> + >> + memset(&ofa_dev->node_guid, 0, sizeof(ofa_dev->node_guid)); >> + if (netdev->type != ARPHRD_LOOPBACK) >> + memcpy(&ofa_dev->node_guid, netdev->dev_addr, 6); >> + else { >> + /* >> + * The loopback device does not have a HW address, >> + * but connection mangagement lib expects gid != 0 >> + */ >> + size_t gidlen = min_t(size_t, strlen(ofa_dev->name), 6); >> + >> + memcpy(&ofa_dev->node_guid, ofa_dev->name, gidlen); >> + } >> + ofa_dev->owner = THIS_MODULE; >> + >> + ofa_dev->uverbs_cmd_mask = >> + (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | >> + (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | >> + (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | >> + (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | >> + (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | >> + (1ull << IB_USER_VERBS_CMD_REG_MR) | >> + (1ull << IB_USER_VERBS_CMD_DEREG_MR) | >> + (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | >> + (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | >> + (1ull << IB_USER_VERBS_CMD_POLL_CQ) | >> + (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | >> + (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | >> + (1ull << IB_USER_VERBS_CMD_CREATE_QP) | >> + (1ull << IB_USER_VERBS_CMD_QUERY_QP) | >> + (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | >> + (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | >> + (1ull << IB_USER_VERBS_CMD_POST_SEND) | >> + (1ull << IB_USER_VERBS_CMD_POST_RECV) | >> + (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | >> + (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | >> + (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | >> + (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | >> + (1ull << IB_USER_VERBS_CMD_REG_MR) | >> + (1ull << IB_USER_VERBS_CMD_DEREG_MR) | >> + (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV); >> + >> + ofa_dev->node_type = RDMA_NODE_RNIC; >> + memcpy(ofa_dev->node_desc, SIW_NODE_DESC_COMMON, >> + sizeof(SIW_NODE_DESC_COMMON)); >> + >> + /* >> + * Current model (one-to-one device association): >> + * One Softiwarp device per net_device or, equivalently, >> + * per physical port. >> + */ >> + ofa_dev->phys_port_cnt = 1; >> + >> + ofa_dev->num_comp_vectors = num_possible_cpus(); >> + ofa_dev->dev.parent = &siw_generic_dma_device; >> + ofa_dev->query_device = siw_query_device; >> + ofa_dev->query_port = siw_query_port; >> + ofa_dev->get_port_immutable = siw_get_port_immutable; >> + ofa_dev->query_qp = siw_query_qp; >> + ofa_dev->modify_port = siw_modify_port; >> + ofa_dev->query_pkey = siw_query_pkey; >> + ofa_dev->query_gid = siw_query_gid; >> + ofa_dev->alloc_ucontext = siw_alloc_ucontext; >> + ofa_dev->dealloc_ucontext = siw_dealloc_ucontext; >> + ofa_dev->mmap = siw_mmap; >> + ofa_dev->alloc_pd = siw_alloc_pd; >> + ofa_dev->dealloc_pd = siw_dealloc_pd; >> + ofa_dev->create_ah = siw_create_ah; >> + ofa_dev->destroy_ah = siw_destroy_ah; >> + ofa_dev->create_qp = siw_create_qp; >> + ofa_dev->modify_qp = siw_verbs_modify_qp; >> + ofa_dev->destroy_qp = siw_destroy_qp; >> + ofa_dev->create_cq = siw_create_cq; >> + ofa_dev->destroy_cq = siw_destroy_cq; >> + ofa_dev->resize_cq = NULL; > >No need to set NULL. > >> + ofa_dev->poll_cq = siw_poll_cq; >> + ofa_dev->get_dma_mr = siw_get_dma_mr; >> + ofa_dev->reg_user_mr = siw_reg_user_mr; >> + ofa_dev->dereg_mr = siw_dereg_mr; >> + ofa_dev->alloc_mr = siw_alloc_mr; >> + ofa_dev->map_mr_sg = siw_map_mr_sg; >> + ofa_dev->dealloc_mw = NULL; >> + >> + ofa_dev->create_srq = siw_create_srq; >> + ofa_dev->modify_srq = siw_modify_srq; >> + ofa_dev->query_srq = siw_query_srq; >> + ofa_dev->destroy_srq = siw_destroy_srq; >> + ofa_dev->post_srq_recv = siw_post_srq_recv; >> + >> + ofa_dev->attach_mcast = NULL; >> + ofa_dev->detach_mcast = NULL; >> + ofa_dev->process_mad = siw_no_mad; >> + >> + ofa_dev->req_notify_cq = siw_req_notify_cq; >> + ofa_dev->post_send = siw_post_send; >> + ofa_dev->post_recv = siw_post_receive; >> + >> + ofa_dev->drain_sq = siw_verbs_sq_flush; >> + ofa_dev->drain_rq = siw_verbs_rq_flush; >> + >> + ofa_dev->dev.dma_ops = &dma_virt_ops; >> + >> + ofa_dev->iwcm->connect = siw_connect; >> + ofa_dev->iwcm->accept = siw_accept; >> + ofa_dev->iwcm->reject = siw_reject; >> + ofa_dev->iwcm->create_listen = siw_create_listen; >> + ofa_dev->iwcm->destroy_listen = siw_destroy_listen; >> + ofa_dev->iwcm->add_ref = siw_qp_get_ref; >> + ofa_dev->iwcm->rem_ref = siw_qp_put_ref; >> + ofa_dev->iwcm->get_qp = siw_get_ofaqp; >> + >> + sdev->attrs.version = VERSION_ID_SOFTIWARP; >> + sdev->attrs.vendor_id = SIW_VENDOR_ID; >> + sdev->attrs.vendor_part_id = SIW_VENDORT_PART_ID; >> + sdev->attrs.sw_version = VERSION_ID_SOFTIWARP; >> + sdev->attrs.max_qp = SIW_MAX_QP; >> + sdev->attrs.max_qp_wr = SIW_MAX_QP_WR; >> + sdev->attrs.max_ord = SIW_MAX_ORD_QP; >> + sdev->attrs.max_ird = SIW_MAX_IRD_QP; >> + sdev->attrs.cap_flags = IB_DEVICE_MEM_MGT_EXTENSIONS; >> + sdev->attrs.max_sge = SIW_MAX_SGE; >> + sdev->attrs.max_sge_rd = SIW_MAX_SGE_RD; >> + sdev->attrs.max_cq = SIW_MAX_CQ; >> + sdev->attrs.max_cqe = SIW_MAX_CQE; >> + sdev->attrs.max_mr = SIW_MAX_MR; >> + sdev->attrs.max_mr_size = rlimit(RLIMIT_MEMLOCK); >> + sdev->attrs.max_pd = SIW_MAX_PD; >> + sdev->attrs.max_mw = SIW_MAX_MW; >> + sdev->attrs.max_fmr = SIW_MAX_FMR; >> + sdev->attrs.max_srq = SIW_MAX_SRQ; >> + sdev->attrs.max_srq_wr = SIW_MAX_SRQ_WR; >> + sdev->attrs.max_srq_sge = SIW_MAX_SGE; >> + >> + siw_idr_init(sdev); >> + INIT_LIST_HEAD(&sdev->cep_list); >> + INIT_LIST_HEAD(&sdev->qp_list); >> + >> + atomic_set(&sdev->num_ctx, 0); >> + atomic_set(&sdev->num_srq, 0); >> + atomic_set(&sdev->num_qp, 0); >> + atomic_set(&sdev->num_cq, 0); >> + atomic_set(&sdev->num_mem, 0); >> + atomic_set(&sdev->num_pd, 0); >> + atomic_set(&sdev->num_cep, 0); >> + >> + sdev->is_registered = 0; >> +out: >> + if (sdev) >> + dev_hold(netdev); >> + >> + return sdev; >> +} >> + >> + >> + >> +static int siw_netdev_event(struct notifier_block *nb, unsigned >long event, >> + void *arg) >> +{ >> + struct net_device *netdev = netdev_notifier_info_to_dev(arg); >> + struct in_device *in_dev; >> + struct siw_dev *sdev; >> + >> + dprint(DBG_DM, " (dev=%s): Event %lu\n", netdev->name, event); >> + >> + if (dev_net(netdev) != &init_net) >> + goto done; >> + >> + sdev = siw_dev_from_netdev(netdev); >> + >> + switch (event) { >> + >> + case NETDEV_UP: >> + if (!sdev) >> + break; >> + >> + if (sdev->is_registered) { >> + sdev->state = IB_PORT_ACTIVE; >> + siw_port_event(sdev, 1, IB_EVENT_PORT_ACTIVE); >> + break; >> + } >> + >> + in_dev = in_dev_get(netdev); >> + if (!in_dev) { >> + dprint(DBG_DM, ": %s: no in_dev\n", netdev->name); >> + sdev->state = IB_PORT_INIT; >> + break; >> + } >> + >> + if (in_dev->ifa_list) { >> + sdev->state = IB_PORT_ACTIVE; >> + siw_device_register(sdev); >> + } else { >> + dprint(DBG_DM, ": %s: no ifa\n", netdev->name); >> + sdev->state = IB_PORT_INIT; >> + } >> + in_dev_put(in_dev); >> + >> + break; >> + >> + case NETDEV_DOWN: >> + if (sdev && sdev->is_registered) { >> + sdev->state = IB_PORT_DOWN; >> + siw_port_event(sdev, 1, IB_EVENT_PORT_ERR); >> + break; >> + } >> + break; >> + >> + case NETDEV_REGISTER: >> + if (!sdev) { >> + if (!siw_dev_qualified(netdev)) >> + break; >> + >> + sdev = siw_device_create(netdev); >> + if (sdev) { >> + sdev->state = IB_PORT_INIT; >> + dprint(DBG_DM, ": new siw device for %s\n", >> + netdev->name); >> + } >> + } >> + break; >> + >> + case NETDEV_UNREGISTER: >> + if (sdev) { >> + if (sdev->is_registered) >> + siw_device_deregister(sdev); >> + list_del(&sdev->list); >> + siw_device_destroy(sdev); >> + } >> + break; >> + >> + case NETDEV_CHANGEADDR: >> + if (sdev->is_registered) >> + siw_port_event(sdev, 1, IB_EVENT_LID_CHANGE); >> + >> + break; >> + /* >> + * Todo: Below netdev events are currently not handled. >> + */ >> + case NETDEV_CHANGEMTU: >> + case NETDEV_GOING_DOWN: >> + case NETDEV_CHANGE: >> + >> + break; >> + >> + default: >> + break; >> + } >> +done: >> + return NOTIFY_OK; >> +} >> + >> +static struct notifier_block siw_netdev_nb = { >> + .notifier_call = siw_netdev_event, >> +}; >> + >> +/* >> + * siw_init_module - Initialize Softiwarp module and register with >netdev >> + * subsystem to create Softiwarp devices per >net_device >> + */ >> +static __init int siw_init_module(void) >> +{ >> + int rv; >> + int nr_cpu; >> + >> + if (SENDPAGE_THRESH < SIW_MAX_INLINE) { >> + pr_info("siw: sendpage threshold too small: %u\n", >> + (int)SENDPAGE_THRESH); >> + rv = EINVAL; >> + goto out; >> + } >> + /* >> + * The xprtrdma module needs at least some rudimentary bus to set >> + * some devices path MTU. >> + */ >> + rv = bus_register(&siw_bus); > >bus register for the driver? no way. I admit - this looks ugly. And the comment above is incomplete. I did this mainly to allow siw to register with loopback devices. During device registration, we need to present a real parent device: device_add() needs it, as called by ib_device_register_sysfs(). The loopback device does not have a parent device, nor dma_ops. The dma_ops can be satisfied by global dma_virt_ops. But I did not find a similar 'virtual parent device' I could present during registration. So I 'invented' that one. I see no other way yet than dropping loopback device support, if this approach is not acceptable. But, we found it nice to have, since performance for the loopback case improved significantly. Any suggestion on how to enable loopback support (referencing 127.0.0.x addresses during connection management) in a more elegant way would be highly appreciated.... > >> + if (rv) >> + goto out_nobus; >> + >> + siw_generic_dma_device.bus = &siw_bus; >> + >> + rv = device_register(&siw_generic_dma_device); >> + if (rv) >> + goto out; >> + >> + rv = siw_cm_init(); >> + if (rv) >> + goto out_unregister; >> + >> + if (DPRINT_MASK) >> + siw_debug_init(); >> + >> + /* >> + * Allocate CRC SHASH object. Fail loading siw only, if CRC is >> + * required by kernel module >> + */ >> + siw_crypto_shash = crypto_alloc_shash("crc32c", 0, 0); >> + if (IS_ERR(siw_crypto_shash)) { >> + pr_info("siw: Loading CRC32c failed: %ld\n", >> + PTR_ERR(siw_crypto_shash)); >> + siw_crypto_shash = NULL; >> + if (mpa_crc_required == true) >> + goto out_unregister; >> + } >> + rv = register_netdevice_notifier(&siw_netdev_nb); >> + if (rv) { >> + siw_debugfs_delete(); >> + goto out_unregister; >> + } >> + for (nr_cpu = 0; nr_cpu < MAX_CPU; nr_cpu++) >> + qp_tx_thread[nr_cpu] = NULL; >> + >> + if (siw_create_tx_threads(MAX_CPU, 1) == 0) { >> + pr_info("Try starting default TX thread\n"); >> + if (siw_create_tx_threads(1, 0) == 0) { >> + pr_info("Could not start any TX thread\n"); >> + goto out_unregister; >> + } >> + } >> + pr_info("SoftiWARP attached\n"); >> + return 0; >> + >> +out_unregister: >> + for (nr_cpu = 0; nr_cpu < MAX_CPU; nr_cpu++) { >> + if (qp_tx_thread[nr_cpu]) { >> + siw_stop_tx_thread(nr_cpu); >> + qp_tx_thread[nr_cpu] = NULL; >> + } >> + } >> + device_unregister(&siw_generic_dma_device); >> + >> + if (siw_crypto_shash) >> + crypto_free_shash(siw_crypto_shash); >> +out: >> + bus_unregister(&siw_bus); >> +out_nobus: >> + pr_info("SoftIWARP attach failed. Error: %d\n", rv); >> + siw_cm_exit(); >> + >> + return rv; >> +} >> + >> + >> +static void __exit siw_exit_module(void) >> +{ >> + int nr_cpu; >> + >> + for (nr_cpu = 0; nr_cpu < MAX_CPU; nr_cpu++) { >> + if (qp_tx_thread[nr_cpu]) { >> + siw_stop_tx_thread(nr_cpu); >> + qp_tx_thread[nr_cpu] = NULL; >> + } >> + } >> + unregister_netdevice_notifier(&siw_netdev_nb); >> + >> + siw_cm_exit(); >> + >> + while (!list_empty(&siw_devlist)) { >> + struct siw_dev *sdev = >> + list_entry(siw_devlist.next, struct siw_dev, list); >> + list_del(&sdev->list); >> + if (sdev->is_registered) >> + siw_device_deregister(sdev); >> + >> + siw_device_destroy(sdev); >> + } >> + if (siw_crypto_shash) >> + crypto_free_shash(siw_crypto_shash); >> + >> + siw_debugfs_delete(); >> + >> + device_unregister(&siw_generic_dma_device); >> + >> + bus_unregister(&siw_bus); >> + >> + pr_info("SoftiWARP detached\n"); >> +} >> + >> +module_init(siw_init_module); >> +module_exit(siw_exit_module); >> -- >> 2.13.6 >> >> -- >> To unsubscribe from this list: send the line "unsubscribe >linux-rdma" in >> the body of a message to majordomo@vger.kernel.org >> More majordomo info at http://vger.kernel.org/majordomo-info.html > [attachment "signature.asc" removed by Bernard Metzler/Zurich/IBM] -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, Nov 08, 2017 at 04:46:26PM +0000, Bernard Metzler wrote: > -----Leon Romanovsky <leon@kernel.org> wrote: ----- > > >> + */ > >> +static __init int siw_init_module(void) > >> +{ > >> + int rv; > >> + int nr_cpu; > >> + > >> + if (SENDPAGE_THRESH < SIW_MAX_INLINE) { > >> + pr_info("siw: sendpage threshold too small: %u\n", > >> + (int)SENDPAGE_THRESH); > >> + rv = EINVAL; > >> + goto out; > >> + } > >> + /* > >> + * The xprtrdma module needs at least some rudimentary bus to set > >> + * some devices path MTU. > >> + */ > >> + rv = bus_register(&siw_bus); > > > >bus register for the driver? no way. > > I admit - this looks ugly. And the comment above is incomplete. > I did this mainly to allow siw to register with loopback devices. > During device registration, we need to present a real parent device: > device_add() needs it, as called by ib_device_register_sysfs(). > The loopback device does not have a parent device, nor dma_ops. > The dma_ops can be satisfied by global dma_virt_ops. But I did not > find a similar 'virtual parent device' I could present during registration. > So I 'invented' that one. > > I see no other way yet than dropping loopback device support, if this > approach is not acceptable. But, we found it nice to have, since performance > for the loopback case improved significantly. > > Any suggestion on how to enable loopback support (referencing 127.0.0.x > addresses during connection management) in a more elegant way would > be highly appreciated.... > RXE solved it by relying on loopback from netdevice, see rxe_dma_device call in rxe_register_device. Will it work for SIW? Thanks
-----Leon Romanovsky <leon@kernel.org> wrote: ----- >To: Bernard Metzler <BMT@zurich.ibm.com> >From: Leon Romanovsky <leon@kernel.org> >Date: 11/09/2017 09:52AM >Cc: linux-rdma@vger.kernel.org >Subject: Re: [PATCH v2 03/13] Attach/detach SoftiWarp to/from network >and RDMA subsystem > >On Wed, Nov 08, 2017 at 04:46:26PM +0000, Bernard Metzler wrote: >> -----Leon Romanovsky <leon@kernel.org> wrote: ----- >> >> >> + */ >> >> +static __init int siw_init_module(void) >> >> +{ >> >> + int rv; >> >> + int nr_cpu; >> >> + >> >> + if (SENDPAGE_THRESH < SIW_MAX_INLINE) { >> >> + pr_info("siw: sendpage threshold too small: %u\n", >> >> + (int)SENDPAGE_THRESH); >> >> + rv = EINVAL; >> >> + goto out; >> >> + } >> >> + /* >> >> + * The xprtrdma module needs at least some rudimentary bus to >set >> >> + * some devices path MTU. >> >> + */ >> >> + rv = bus_register(&siw_bus); >> > >> >bus register for the driver? no way. >> >> I admit - this looks ugly. And the comment above is incomplete. >> I did this mainly to allow siw to register with loopback devices. >> During device registration, we need to present a real parent >device: >> device_add() needs it, as called by ib_device_register_sysfs(). >> The loopback device does not have a parent device, nor dma_ops. >> The dma_ops can be satisfied by global dma_virt_ops. But I did not >> find a similar 'virtual parent device' I could present during >registration. >> So I 'invented' that one. >> >> I see no other way yet than dropping loopback device support, if >this >> approach is not acceptable. But, we found it nice to have, since >performance >> for the loopback case improved significantly. >> >> Any suggestion on how to enable loopback support (referencing >127.0.0.x >> addresses during connection management) in a more elegant way would >> be highly appreciated.... >> > >RXE solved it by relying on loopback from netdevice, see >rxe_dma_device call >in rxe_register_device. > >Will it work for SIW? > >Thanks > Right...this is basically what siw can do as well for all other devices it attaches to. But it will not work for the real loopback device (addr 127.0.0.1 and friends), since that device does not come with a parent device it belongs to. Loopback seem to be kind of a 'top level' device. rxe_dma_device() returns netdev->dev.parent, which is NULL for loopback. So there is probably currently no support for lo devices within rxe. I'll take the loopback devices itself as the parent device. That seem to work. I'd suggest that for rxe as well. Thank you! -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Nov 09, 2017 at 02:43:41PM +0000, Bernard Metzler wrote: > > -----Leon Romanovsky <leon@kernel.org> wrote: ----- > > >To: Bernard Metzler <BMT@zurich.ibm.com> > >From: Leon Romanovsky <leon@kernel.org> > >Date: 11/09/2017 09:52AM > >Cc: linux-rdma@vger.kernel.org > >Subject: Re: [PATCH v2 03/13] Attach/detach SoftiWarp to/from network > >and RDMA subsystem > > > >On Wed, Nov 08, 2017 at 04:46:26PM +0000, Bernard Metzler wrote: > >> -----Leon Romanovsky <leon@kernel.org> wrote: ----- > >> > >> >> + */ > >> >> +static __init int siw_init_module(void) > >> >> +{ > >> >> + int rv; > >> >> + int nr_cpu; > >> >> + > >> >> + if (SENDPAGE_THRESH < SIW_MAX_INLINE) { > >> >> + pr_info("siw: sendpage threshold too small: %u\n", > >> >> + (int)SENDPAGE_THRESH); > >> >> + rv = EINVAL; > >> >> + goto out; > >> >> + } > >> >> + /* > >> >> + * The xprtrdma module needs at least some rudimentary bus to > >set > >> >> + * some devices path MTU. > >> >> + */ > >> >> + rv = bus_register(&siw_bus); > >> > > >> >bus register for the driver? no way. > >> > >> I admit - this looks ugly. And the comment above is incomplete. > >> I did this mainly to allow siw to register with loopback devices. > >> During device registration, we need to present a real parent > >device: > >> device_add() needs it, as called by ib_device_register_sysfs(). > >> The loopback device does not have a parent device, nor dma_ops. > >> The dma_ops can be satisfied by global dma_virt_ops. But I did not > >> find a similar 'virtual parent device' I could present during > >registration. > >> So I 'invented' that one. > >> > >> I see no other way yet than dropping loopback device support, if > >this > >> approach is not acceptable. But, we found it nice to have, since > >performance > >> for the loopback case improved significantly. > >> > >> Any suggestion on how to enable loopback support (referencing > >127.0.0.x > >> addresses during connection management) in a more elegant way would > >> be highly appreciated.... > >> > > > >RXE solved it by relying on loopback from netdevice, see > >rxe_dma_device call > >in rxe_register_device. > > > >Will it work for SIW? > > > >Thanks > > > > Right...this is basically what siw can do as well for all > other devices it attaches to. But it will not work for the > real loopback device (addr 127.0.0.1 and friends), since that > device does not come with a parent device it belongs > to. Loopback seem to be kind of a 'top level' device. > > rxe_dma_device() returns netdev->dev.parent, which is > NULL for loopback. So there is probably currently no support > for lo devices within rxe. > > I'll take the loopback devices itself as the parent device. > That seem to work. I'd suggest that for rxe as well. Feel free to send a patch for RXE. Thanks > > Thank you! >
-----Leon Romanovsky <leon@kernel.org> wrote: ----- >To: Bernard Metzler <BMT@zurich.ibm.com> >From: Leon Romanovsky <leon@kernel.org> >Date: 10/14/2017 08:41AM >Cc: linux-rdma@vger.kernel.org >Subject: Re: [PATCH v2 03/13] Attach/detach SoftiWarp to/from network >and RDMA subsystem > >On Sat, Oct 14, 2017 at 01:28:43AM +0000, Bernard Metzler wrote: >> -----Leon Romanovsky <leon@kernel.org> wrote: ----- >> >> >> + >> >> +#define SIW_MAX_IF 12 >> >> +static int if_cnt; >> >> +static char *iface_list[SIW_MAX_IF] = {[0 ... (SIW_MAX_IF-1)] = >> >'\0'}; >> >> +module_param_array(iface_list, charp, &if_cnt, 0444); >> >> +MODULE_PARM_DESC(iface_list, "Interface list siw attaches to if >> >present"); >> >> + >> >> +static bool loopback_enabled = 1; >> >> +module_param(loopback_enabled, bool, 0644); >> >> +MODULE_PARM_DESC(loopback_enabled, "enable_loopback"); >> >> + >> >> +LIST_HEAD(siw_devlist); >> >> + >> >> +static int cpu_cnt; >> >> +static char *tx_cpu_list[MAX_CPU] = {[0 ... (MAX_CPU-1)] = >'\0'}; >> >> +module_param_array(tx_cpu_list, charp, &cpu_cnt, 0444); >> >> +MODULE_PARM_DESC(tx_cpu_list, "List of CPUs siw TX thread shall >be >> >bound to"); >> > >> >No module parameters please. >> OK. Would you have a pointer which sheds some light on that >> rule? Thank you! >> > >It is not rule, but common knowledge exactly as BUG_ON which is not >prohibited, but makes no sense in low level driver code. > >The module parameters sometimes make sense, for example in subsystem >level where they can apply to whole devices underneath. > >But most of the time, they indicate complete ignorance of users in >favor >of easy developer's life. > >For people, like me, who doesn't run modules at all, the change in >module parameters require rebuild of initramfs and in more extreme >cases rebuild of SELinux labels. > I will try to finish up a new patch set for siw over Christmas holidays. One question I have - what is the recommended way of replacing those module parameters, if we need to flexibly parameterize things from user land? For the rxe driver, I see the usage of module_param_cb, but this probably is not what we want in the long run? Would a sysctl be appropriate, or a char device (we may need to pass strings like interface names), or configfs? Thanks very much! Bernard. -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, Dec 22, 2017 at 11:29:38AM +0000, Bernard Metzler wrote: > > -----Leon Romanovsky <leon@kernel.org> wrote: ----- > > >To: Bernard Metzler <BMT@zurich.ibm.com> > >From: Leon Romanovsky <leon@kernel.org> > >Date: 10/14/2017 08:41AM > >Cc: linux-rdma@vger.kernel.org > >Subject: Re: [PATCH v2 03/13] Attach/detach SoftiWarp to/from network > >and RDMA subsystem > > > >On Sat, Oct 14, 2017 at 01:28:43AM +0000, Bernard Metzler wrote: > >> -----Leon Romanovsky <leon@kernel.org> wrote: ----- > >> > >> >> + > >> >> +#define SIW_MAX_IF 12 > >> >> +static int if_cnt; > >> >> +static char *iface_list[SIW_MAX_IF] = {[0 ... (SIW_MAX_IF-1)] = > >> >'\0'}; > >> >> +module_param_array(iface_list, charp, &if_cnt, 0444); > >> >> +MODULE_PARM_DESC(iface_list, "Interface list siw attaches to if > >> >present"); > >> >> + > >> >> +static bool loopback_enabled = 1; > >> >> +module_param(loopback_enabled, bool, 0644); > >> >> +MODULE_PARM_DESC(loopback_enabled, "enable_loopback"); > >> >> + > >> >> +LIST_HEAD(siw_devlist); > >> >> + > >> >> +static int cpu_cnt; > >> >> +static char *tx_cpu_list[MAX_CPU] = {[0 ... (MAX_CPU-1)] = > >'\0'}; > >> >> +module_param_array(tx_cpu_list, charp, &cpu_cnt, 0444); > >> >> +MODULE_PARM_DESC(tx_cpu_list, "List of CPUs siw TX thread shall > >be > >> >bound to"); > >> > > >> >No module parameters please. > >> OK. Would you have a pointer which sheds some light on that > >> rule? Thank you! > >> > > > >It is not rule, but common knowledge exactly as BUG_ON which is not > >prohibited, but makes no sense in low level driver code. > > > >The module parameters sometimes make sense, for example in subsystem > >level where they can apply to whole devices underneath. > > > >But most of the time, they indicate complete ignorance of users in > >favor > >of easy developer's life. > > > >For people, like me, who doesn't run modules at all, the change in > >module parameters require rebuild of initramfs and in more extreme > >cases rebuild of SELinux labels. > > > > I will try to finish up a new patch set for siw over > Christmas holidays. > > One question I have - what is the recommended way of replacing > those module parameters, if we need to flexibly parameterize > things from user land? For the rxe driver, I see the usage of > module_param_cb, but this probably is not what we want in the > long run? Would a sysctl be appropriate, or a char device > (we may need to pass strings like interface names), or configfs? It looks like "ip" tool can be good fit, or teach ib/core to accept parameters from rdmatool before actual device exists. Thanks > > Thanks very much! > Bernard. >
On Fri, Dec 22, 2017 at 11:29:38AM +0000, Bernard Metzler wrote: > One question I have - what is the recommended way of replacing > those module parameters, if we need to flexibly parameterize > things from user land? For the rxe driver, I see the usage of > module_param_cb, but this probably is not what we want in the > long run? Would a sysctl be appropriate, or a char device > (we may need to pass strings like interface names), or configfs? Probably something like 'rdmatool interface attach' eg cause it to create using the RDMA netlink protocol. Leon? I'd like to see the same for rxe as it solves module autoloading problems. Jason -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, Jan 02, 2018 at 02:37:06PM -0700, Jason Gunthorpe wrote: > On Fri, Dec 22, 2017 at 11:29:38AM +0000, Bernard Metzler wrote: > > > One question I have - what is the recommended way of replacing > > those module parameters, if we need to flexibly parameterize > > things from user land? For the rxe driver, I see the usage of > > module_param_cb, but this probably is not what we want in the > > long run? Would a sysctl be appropriate, or a char device > > (we may need to pass strings like interface names), or configfs? > > Probably something like 'rdmatool interface attach' eg cause it to > create using the RDMA netlink protocol. > > Leon? https://www.spinics.net/lists/linux-rdma/msg58498.html "It looks like "ip" tool can be good fit ..." ip link add ... ip link set ... > > I'd like to see the same for rxe as it solves module autoloading > problems. +1 > > Jason
On Wed, Jan 03, 2018 at 07:25:29AM +0200, Leon Romanovsky wrote: > On Tue, Jan 02, 2018 at 02:37:06PM -0700, Jason Gunthorpe wrote: > > On Fri, Dec 22, 2017 at 11:29:38AM +0000, Bernard Metzler wrote: > > > > > One question I have - what is the recommended way of replacing > > > those module parameters, if we need to flexibly parameterize > > > things from user land? For the rxe driver, I see the usage of > > > module_param_cb, but this probably is not what we want in the > > > long run? Would a sysctl be appropriate, or a char device > > > (we may need to pass strings like interface names), or configfs? > > > > Probably something like 'rdmatool interface attach' eg cause it to > > create using the RDMA netlink protocol. > > > > Leon? > > https://www.spinics.net/lists/linux-rdma/msg58498.html > "It looks like "ip" tool can be good fit ..." > > ip link add ... > ip link set ... ip link adds netdevs, so that doesn't quite feel right for adding rdma devices??? Jason -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, Jan 03, 2018 at 08:52:25AM -0700, Jason Gunthorpe wrote: > On Wed, Jan 03, 2018 at 07:25:29AM +0200, Leon Romanovsky wrote: > > On Tue, Jan 02, 2018 at 02:37:06PM -0700, Jason Gunthorpe wrote: > > > On Fri, Dec 22, 2017 at 11:29:38AM +0000, Bernard Metzler wrote: > > > > > > > One question I have - what is the recommended way of replacing > > > > those module parameters, if we need to flexibly parameterize > > > > things from user land? For the rxe driver, I see the usage of > > > > module_param_cb, but this probably is not what we want in the > > > > long run? Would a sysctl be appropriate, or a char device > > > > (we may need to pass strings like interface names), or configfs? > > > > > > Probably something like 'rdmatool interface attach' eg cause it to > > > create using the RDMA netlink protocol. > > > > > > Leon? > > > > https://www.spinics.net/lists/linux-rdma/msg58498.html > > "It looks like "ip" tool can be good fit ..." > > > > ip link add ... > > ip link set ... > > ip link adds netdevs, so that doesn't quite feel right for adding rdma > devices??? It depends on the add/delete order, currently we are adding RDMA (RXE/SoftiWARP) device to existing netdev. Will it make sense to reverse order and add/delete netdev device for the existing RDMA (RXE/SoftiWARP) device? In this "reverse order", ip tool can be right tool. Thanks > > Jason
On Wed, Jan 03, 2018 at 07:31:05PM +0200, Leon Romanovsky wrote: > On Wed, Jan 03, 2018 at 08:52:25AM -0700, Jason Gunthorpe wrote: > > On Wed, Jan 03, 2018 at 07:25:29AM +0200, Leon Romanovsky wrote: > > > On Tue, Jan 02, 2018 at 02:37:06PM -0700, Jason Gunthorpe wrote: > > > > On Fri, Dec 22, 2017 at 11:29:38AM +0000, Bernard Metzler wrote: > > > > > > > > > One question I have - what is the recommended way of replacing > > > > > those module parameters, if we need to flexibly parameterize > > > > > things from user land? For the rxe driver, I see the usage of > > > > > module_param_cb, but this probably is not what we want in the > > > > > long run? Would a sysctl be appropriate, or a char device > > > > > (we may need to pass strings like interface names), or configfs? > > > > > > > > Probably something like 'rdmatool interface attach' eg cause it to > > > > create using the RDMA netlink protocol. > > > > > > > > Leon? > > > > > > https://www.spinics.net/lists/linux-rdma/msg58498.html > > > "It looks like "ip" tool can be good fit ..." > > > > > > ip link add ... > > > ip link set ... > > > > ip link adds netdevs, so that doesn't quite feel right for adding rdma > > devices??? > > It depends on the add/delete order, currently we are adding > RDMA (RXE/SoftiWARP) device to existing netdev. > Will it make sense to reverse order and add/delete netdev device for > the existing RDMA (RXE/SoftiWARP) device? No. Jason -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
--- Bernard Metzler, PhD Tech. Leader High Performance I/O, Principal Research Staff IBM Zurich Research Laboratory Saeumerstrasse 4 CH-8803 Rueschlikon, Switzerland +41 44 724 8605 -----Jason Gunthorpe <jgg@ziepe.ca> wrote: ----- >To: Leon Romanovsky <leon@kernel.org> >From: Jason Gunthorpe <jgg@ziepe.ca> >Date: 01/03/2018 06:37PM >Cc: Bernard Metzler <BMT@zurich.ibm.com>, linux-rdma@vger.kernel.org >Subject: Re: [PATCH v2 03/13] Attach/detach SoftiWarp to/from network >and RDMA subsystem > >On Wed, Jan 03, 2018 at 07:31:05PM +0200, Leon Romanovsky wrote: >> On Wed, Jan 03, 2018 at 08:52:25AM -0700, Jason Gunthorpe wrote: >> > On Wed, Jan 03, 2018 at 07:25:29AM +0200, Leon Romanovsky wrote: >> > > On Tue, Jan 02, 2018 at 02:37:06PM -0700, Jason Gunthorpe wrote: >> > > > On Fri, Dec 22, 2017 at 11:29:38AM +0000, Bernard Metzler wrote: >> > > > >> > > > > One question I have - what is the recommended way of replacing >> > > > > those module parameters, if we need to flexibly parameterize >> > > > > things from user land? For the rxe driver, I see the usage of >> > > > > module_param_cb, but this probably is not what we want in the >> > > > > long run? Would a sysctl be appropriate, or a char device >> > > > > (we may need to pass strings like interface names), or configfs? >> > > > >> > > > Probably something like 'rdmatool interface attach' eg cause it to >> > > > create using the RDMA netlink protocol. >> > > > >> > > > Leon? >> > > >> > > >https://urldefense.proofpoint.com/v2/url?u=https-3A__www.spinics.net_ >lists_linux-2Drdma_msg58498.html&d=DwIBAg&c=jf_iaSHvJObTbx-siA1ZOg&r= >2TaYXQ0T-r8ZO1PP1alNwU_QJcRRLfmYTAgd3QCvqSc&m=j_ugxX4CwKK5qXMrL9qUL22 >vuEuR-QuZGL2SYI0U6zA&s=CutkYdjJcUPYU6HSJPpcszcnDWodIOZOUIjpochezeU&e= >> > > "It looks like "ip" tool can be good fit ..." >> > > >> > > ip link add ... >> > > ip link set ... >> > >> > ip link adds netdevs, so that doesn't quite feel right for adding rdma >> > devices??? >> >> It depends on the add/delete order, currently we are adding >> RDMA (RXE/SoftiWARP) device to existing netdev. > >> Will it make sense to reverse order and add/delete netdev device for >> the existing RDMA (RXE/SoftiWARP) device? > > No. > > Jason > What about potentially enabling it on all interfaces which are used by TCP? iWarp is an ULP to TCP... Exceptions might be interfaces which implement their own RDMA protocol - like iWARP adapters, or IPoIB devices, or interfaces which already have a use from SoftRoCE. Thanks, Bernard. -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Jan 04, 2018 at 03:05:52PM +0000, Bernard Metzler wrote: > What about potentially enabling it on all interfaces which are used > by TCP? iWarp is an ULP to TCP... Exceptions might be interfaces which > implement their own RDMA protocol - like iWARP adapters, or IPoIB > devices, or interfaces which already have a use from SoftRoCE. Since soft iwarp is likely to start out as a huge security risk, it should be opted into. Jason -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
-----Jason Gunthorpe <jgg@ziepe.ca> wrote: ----- >To: Bernard Metzler <BMT@zurich.ibm.com> >From: Jason Gunthorpe <jgg@ziepe.ca> >Date: 01/04/2018 07:21PM >Cc: Leon Romanovsky <leon@kernel.org>, linux-rdma@vger.kernel.org >Subject: Re: [PATCH v2 03/13] Attach/detach SoftiWarp to/from network >and RDMA subsystem > >On Thu, Jan 04, 2018 at 03:05:52PM +0000, Bernard Metzler wrote: > >> What about potentially enabling it on all interfaces which are used >> by TCP? iWarp is an ULP to TCP... Exceptions might be interfaces >which >> implement their own RDMA protocol - like iWARP adapters, or IPoIB >> devices, or interfaces which already have a use from SoftRoCE. > >Since soft iwarp is likely to start out as a huge security risk, it >should be opted into. > >Jason > > Right, security will likely be one of the debates..! Can't parse that above - are you suggesting only explicitly enabling softiwarp on a per interface basis? Thanks! Bernard. -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
> Can't parse that above - are you suggesting only explicitly enabling softiwarp > on a per interface basis? Right. Jason -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c new file mode 100644 index 000000000000..5a054c6becaa --- /dev/null +++ b/drivers/infiniband/sw/siw/siw_main.c @@ -0,0 +1,752 @@ +/* + * Software iWARP device driver for Linux + * + * Authors: Bernard Metzler <bmt@zurich.ibm.com> + * + * Copyright (c) 2008-2017, IBM Corporation + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * - Neither the name of IBM nor the names of its contributors may be + * used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/init.h> +#include <linux/errno.h> +#include <linux/netdevice.h> +#include <linux/inetdevice.h> +#include <net/net_namespace.h> +#include <linux/rtnetlink.h> +#include <linux/if_arp.h> +#include <linux/list.h> +#include <linux/kernel.h> +#include <linux/dma-mapping.h> + +#include <rdma/ib_verbs.h> +#include <rdma/ib_smi.h> +#include <rdma/ib_user_verbs.h> + +#include "siw.h" +#include "siw_obj.h" +#include "siw_cm.h" +#include "siw_verbs.h" +#include <linux/kthread.h> + + +MODULE_AUTHOR("Bernard Metzler"); +MODULE_DESCRIPTION("Software iWARP Driver"); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_VERSION("0.2"); + +#define SIW_MAX_IF 12 +static int if_cnt; +static char *iface_list[SIW_MAX_IF] = {[0 ... (SIW_MAX_IF-1)] = '\0'}; +module_param_array(iface_list, charp, &if_cnt, 0444); +MODULE_PARM_DESC(iface_list, "Interface list siw attaches to if present"); + +static bool loopback_enabled = 1; +module_param(loopback_enabled, bool, 0644); +MODULE_PARM_DESC(loopback_enabled, "enable_loopback"); + +LIST_HEAD(siw_devlist); + +static int cpu_cnt; +static char *tx_cpu_list[MAX_CPU] = {[0 ... (MAX_CPU-1)] = '\0'}; +module_param_array(tx_cpu_list, charp, &cpu_cnt, 0444); +MODULE_PARM_DESC(tx_cpu_list, "List of CPUs siw TX thread shall be bound to"); + +int default_tx_cpu = -1; +struct task_struct *qp_tx_thread[MAX_CPU]; +struct crypto_shash *siw_crypto_shash; + +static ssize_t show_sw_version(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct siw_dev *sdev = container_of(dev, struct siw_dev, ofa_dev.dev); + + return sprintf(buf, "%x\n", sdev->attrs.version); +} + +static DEVICE_ATTR(sw_version, 0444, show_sw_version, NULL); + +static struct device_attribute *siw_dev_attributes[] = { + &dev_attr_sw_version +}; + +static void siw_device_release(struct device *dev) +{ + pr_info("%s device released\n", dev_name(dev)); +} + +static struct device siw_generic_dma_device = { + .dma_ops = &siw_dma_generic_ops, + .init_name = "software-rdma-v2", + .release = siw_device_release +}; + +static struct bus_type siw_bus = { + .name = "siw", +}; + +static int siw_modify_port(struct ib_device *ofa_dev, u8 port, int mask, + struct ib_port_modify *props) +{ + return -EOPNOTSUPP; +} + + +static void siw_device_register(struct siw_dev *sdev) +{ + struct ib_device *ofa_dev = &sdev->ofa_dev; + int rv, i; + static int dev_id = 1; + + rv = ib_register_device(ofa_dev, NULL); + if (rv) { + dprint(DBG_DM|DBG_ON, " %s: ib register error: rv=%d\n", + ofa_dev->name, rv); + return; + } + + for (i = 0; i < ARRAY_SIZE(siw_dev_attributes); ++i) { + rv = device_create_file(&ofa_dev->dev, siw_dev_attributes[i]); + if (rv) { + dprint(DBG_DM|DBG_ON, " %s: create file error: rv=%d\n", + ofa_dev->name, rv); + ib_unregister_device(ofa_dev); + return; + } + } + siw_debugfs_add_device(sdev); + + sdev->attrs.vendor_part_id = dev_id++; + + dprint(DBG_DM, ": '%s' at '%s', HWaddr=%02x.%02x.%02x.%02x.%02x.%02x\n", + ofa_dev->name, sdev->netdev->name, + *(u8 *)sdev->netdev->dev_addr, + *((u8 *)sdev->netdev->dev_addr + 1), + *((u8 *)sdev->netdev->dev_addr + 2), + *((u8 *)sdev->netdev->dev_addr + 3), + *((u8 *)sdev->netdev->dev_addr + 4), + *((u8 *)sdev->netdev->dev_addr + 5)); + + sdev->is_registered = 1; +} + +static void siw_device_deregister(struct siw_dev *sdev) +{ + int i; + + siw_debugfs_del_device(sdev); + + if (sdev->is_registered) { + + dprint(DBG_DM, ": deregister %s at %s\n", sdev->ofa_dev.name, + sdev->netdev->name); + + for (i = 0; i < ARRAY_SIZE(siw_dev_attributes); ++i) + device_remove_file(&sdev->ofa_dev.dev, + siw_dev_attributes[i]); + + ib_unregister_device(&sdev->ofa_dev); + } + if (atomic_read(&sdev->num_ctx) || atomic_read(&sdev->num_srq) || + atomic_read(&sdev->num_mem) || atomic_read(&sdev->num_cep) || + atomic_read(&sdev->num_qp) || atomic_read(&sdev->num_cq) || + atomic_read(&sdev->num_pd)) { + pr_warn("SIW at %s: orphaned resources!\n", sdev->netdev->name); + pr_warn("CTX %d, SRQ %d, QP %d, CQ %d, MEM %d, CEP %d, PD %d\n", + atomic_read(&sdev->num_ctx), + atomic_read(&sdev->num_srq), + atomic_read(&sdev->num_qp), + atomic_read(&sdev->num_cq), + atomic_read(&sdev->num_mem), + atomic_read(&sdev->num_cep), + atomic_read(&sdev->num_pd)); + } + i = 0; + + while (!list_empty(&sdev->cep_list)) { + struct siw_cep *cep = list_entry(sdev->cep_list.next, + struct siw_cep, devq); + list_del(&cep->devq); + dprint(DBG_ON, ": Free CEP (0x%p), state: %d\n", + cep, cep->state); + kfree(cep); + i++; + } + if (i) + pr_warn("%s: free'd %d CEPs\n", __func__, i); + + sdev->is_registered = 0; +} + +static void siw_device_destroy(struct siw_dev *sdev) +{ + dprint(DBG_DM, ": destroy siw device at %s\n", sdev->netdev->name); + + siw_idr_release(sdev); + kfree(sdev->ofa_dev.iwcm); + dev_put(sdev->netdev); + ib_dealloc_device(&sdev->ofa_dev); +} + + +static int siw_match_iflist(struct net_device *dev) +{ + int i; + + if (if_cnt == 0) + return 1; + + if_cnt = min_t(int, SIW_MAX_IF, if_cnt); + + for (i = 0; i < if_cnt; i++) + if (!strcmp(iface_list[i], dev->name)) + return 1; + return 0; +} + +static struct siw_dev *siw_dev_from_netdev(struct net_device *dev) +{ + if (!list_empty(&siw_devlist)) { + struct list_head *pos; + + list_for_each(pos, &siw_devlist) { + struct siw_dev *sdev = + list_entry(pos, struct siw_dev, list); + if (sdev->netdev == dev) + return sdev; + } + } + return NULL; +} + +static int siw_tx_qualified(int cpu) +{ + int i; + + if (cpu_cnt == 0) + return 1; + + for (i = 0; i < cpu_cnt; i++) { + int new_cpu; + + if (kstrtoint(tx_cpu_list[i], 0, &new_cpu)) + continue; + if (cpu == new_cpu) + return 1; + } + return 0; +} + +static int siw_create_tx_threads(int max_threads, int check_qualified) +{ + int cpu, rv, assigned = 0; + + if (max_threads < 0 || max_threads > MAX_CPU) + return 0; + + for_each_online_cpu(cpu) { + if (siw_tx_qualified(cpu)) { + qp_tx_thread[cpu] = + kthread_create(siw_run_sq, + (unsigned long *)(long)cpu, + "qp_tx_thread/%d", cpu); + kthread_bind(qp_tx_thread[cpu], cpu); + if (IS_ERR(qp_tx_thread)) { + rv = PTR_ERR(qp_tx_thread); + qp_tx_thread[cpu] = NULL; + pr_info("Binding TX thread to CPU %d failed", + cpu); + break; + } + wake_up_process(qp_tx_thread[cpu]); + assigned++; + if (default_tx_cpu < 0) + default_tx_cpu = cpu; + if (assigned >= max_threads) + break; + } + } + return assigned; +} + +static int siw_dev_qualified(struct net_device *netdev) +{ + if (!siw_match_iflist(netdev)) { + dprint(DBG_DM, ": %s (not selected)\n", + netdev->name); + return 0; + } + /* + * Additional hardware support can be added here + * (e.g. ARPHRD_FDDI, ARPHRD_ATM, ...) - see + * <linux/if_arp.h> for type identifiers. + */ + if (netdev->type == ARPHRD_ETHER || + netdev->type == ARPHRD_IEEE802 || + netdev->type == ARPHRD_INFINIBAND || + (netdev->type == ARPHRD_LOOPBACK && loopback_enabled)) + return 1; + + return 0; +} + +static void siw_verbs_sq_flush(struct ib_qp *ofa_qp) +{ + struct siw_qp *qp = siw_qp_ofa2siw(ofa_qp); + + down_write(&qp->state_lock); + siw_sq_flush(qp); + up_write(&qp->state_lock); +} + +static void siw_verbs_rq_flush(struct ib_qp *ofa_qp) +{ + struct siw_qp *qp = siw_qp_ofa2siw(ofa_qp); + + down_write(&qp->state_lock); + siw_rq_flush(qp); + up_write(&qp->state_lock); +} + +static struct ib_ah *siw_create_ah(struct ib_pd *pd, struct rdma_ah_attr *attr, + struct ib_udata *udata) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static int siw_destroy_ah(struct ib_ah *ah) +{ + return -EOPNOTSUPP; +} + + +static struct siw_dev *siw_device_create(struct net_device *netdev) +{ + struct siw_dev *sdev = (struct siw_dev *)ib_alloc_device(sizeof(*sdev)); + struct ib_device *ofa_dev; + + if (!sdev) + goto out; + + ofa_dev = &sdev->ofa_dev; + + ofa_dev->iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL); + if (!ofa_dev->iwcm) { + ib_dealloc_device(ofa_dev); + sdev = NULL; + goto out; + } + + sdev->netdev = netdev; + list_add_tail(&sdev->list, &siw_devlist); + + strcpy(ofa_dev->name, SIW_IBDEV_PREFIX); + strlcpy(ofa_dev->name + strlen(SIW_IBDEV_PREFIX), netdev->name, + IB_DEVICE_NAME_MAX - strlen(SIW_IBDEV_PREFIX)); + + memset(&ofa_dev->node_guid, 0, sizeof(ofa_dev->node_guid)); + if (netdev->type != ARPHRD_LOOPBACK) + memcpy(&ofa_dev->node_guid, netdev->dev_addr, 6); + else { + /* + * The loopback device does not have a HW address, + * but connection mangagement lib expects gid != 0 + */ + size_t gidlen = min_t(size_t, strlen(ofa_dev->name), 6); + + memcpy(&ofa_dev->node_guid, ofa_dev->name, gidlen); + } + ofa_dev->owner = THIS_MODULE; + + ofa_dev->uverbs_cmd_mask = + (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | + (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | + (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | + (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_REG_MR) | + (1ull << IB_USER_VERBS_CMD_DEREG_MR) | + (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | + (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | + (1ull << IB_USER_VERBS_CMD_POLL_CQ) | + (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | + (1ull << IB_USER_VERBS_CMD_CREATE_QP) | + (1ull << IB_USER_VERBS_CMD_QUERY_QP) | + (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | + (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | + (1ull << IB_USER_VERBS_CMD_POST_SEND) | + (1ull << IB_USER_VERBS_CMD_POST_RECV) | + (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | + (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | + (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | + (1ull << IB_USER_VERBS_CMD_REG_MR) | + (1ull << IB_USER_VERBS_CMD_DEREG_MR) | + (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV); + + ofa_dev->node_type = RDMA_NODE_RNIC; + memcpy(ofa_dev->node_desc, SIW_NODE_DESC_COMMON, + sizeof(SIW_NODE_DESC_COMMON)); + + /* + * Current model (one-to-one device association): + * One Softiwarp device per net_device or, equivalently, + * per physical port. + */ + ofa_dev->phys_port_cnt = 1; + + ofa_dev->num_comp_vectors = num_possible_cpus(); + ofa_dev->dev.parent = &siw_generic_dma_device; + ofa_dev->query_device = siw_query_device; + ofa_dev->query_port = siw_query_port; + ofa_dev->get_port_immutable = siw_get_port_immutable; + ofa_dev->query_qp = siw_query_qp; + ofa_dev->modify_port = siw_modify_port; + ofa_dev->query_pkey = siw_query_pkey; + ofa_dev->query_gid = siw_query_gid; + ofa_dev->alloc_ucontext = siw_alloc_ucontext; + ofa_dev->dealloc_ucontext = siw_dealloc_ucontext; + ofa_dev->mmap = siw_mmap; + ofa_dev->alloc_pd = siw_alloc_pd; + ofa_dev->dealloc_pd = siw_dealloc_pd; + ofa_dev->create_ah = siw_create_ah; + ofa_dev->destroy_ah = siw_destroy_ah; + ofa_dev->create_qp = siw_create_qp; + ofa_dev->modify_qp = siw_verbs_modify_qp; + ofa_dev->destroy_qp = siw_destroy_qp; + ofa_dev->create_cq = siw_create_cq; + ofa_dev->destroy_cq = siw_destroy_cq; + ofa_dev->resize_cq = NULL; + ofa_dev->poll_cq = siw_poll_cq; + ofa_dev->get_dma_mr = siw_get_dma_mr; + ofa_dev->reg_user_mr = siw_reg_user_mr; + ofa_dev->dereg_mr = siw_dereg_mr; + ofa_dev->alloc_mr = siw_alloc_mr; + ofa_dev->map_mr_sg = siw_map_mr_sg; + ofa_dev->dealloc_mw = NULL; + + ofa_dev->create_srq = siw_create_srq; + ofa_dev->modify_srq = siw_modify_srq; + ofa_dev->query_srq = siw_query_srq; + ofa_dev->destroy_srq = siw_destroy_srq; + ofa_dev->post_srq_recv = siw_post_srq_recv; + + ofa_dev->attach_mcast = NULL; + ofa_dev->detach_mcast = NULL; + ofa_dev->process_mad = siw_no_mad; + + ofa_dev->req_notify_cq = siw_req_notify_cq; + ofa_dev->post_send = siw_post_send; + ofa_dev->post_recv = siw_post_receive; + + ofa_dev->drain_sq = siw_verbs_sq_flush; + ofa_dev->drain_rq = siw_verbs_rq_flush; + + ofa_dev->dev.dma_ops = &dma_virt_ops; + + ofa_dev->iwcm->connect = siw_connect; + ofa_dev->iwcm->accept = siw_accept; + ofa_dev->iwcm->reject = siw_reject; + ofa_dev->iwcm->create_listen = siw_create_listen; + ofa_dev->iwcm->destroy_listen = siw_destroy_listen; + ofa_dev->iwcm->add_ref = siw_qp_get_ref; + ofa_dev->iwcm->rem_ref = siw_qp_put_ref; + ofa_dev->iwcm->get_qp = siw_get_ofaqp; + + sdev->attrs.version = VERSION_ID_SOFTIWARP; + sdev->attrs.vendor_id = SIW_VENDOR_ID; + sdev->attrs.vendor_part_id = SIW_VENDORT_PART_ID; + sdev->attrs.sw_version = VERSION_ID_SOFTIWARP; + sdev->attrs.max_qp = SIW_MAX_QP; + sdev->attrs.max_qp_wr = SIW_MAX_QP_WR; + sdev->attrs.max_ord = SIW_MAX_ORD_QP; + sdev->attrs.max_ird = SIW_MAX_IRD_QP; + sdev->attrs.cap_flags = IB_DEVICE_MEM_MGT_EXTENSIONS; + sdev->attrs.max_sge = SIW_MAX_SGE; + sdev->attrs.max_sge_rd = SIW_MAX_SGE_RD; + sdev->attrs.max_cq = SIW_MAX_CQ; + sdev->attrs.max_cqe = SIW_MAX_CQE; + sdev->attrs.max_mr = SIW_MAX_MR; + sdev->attrs.max_mr_size = rlimit(RLIMIT_MEMLOCK); + sdev->attrs.max_pd = SIW_MAX_PD; + sdev->attrs.max_mw = SIW_MAX_MW; + sdev->attrs.max_fmr = SIW_MAX_FMR; + sdev->attrs.max_srq = SIW_MAX_SRQ; + sdev->attrs.max_srq_wr = SIW_MAX_SRQ_WR; + sdev->attrs.max_srq_sge = SIW_MAX_SGE; + + siw_idr_init(sdev); + INIT_LIST_HEAD(&sdev->cep_list); + INIT_LIST_HEAD(&sdev->qp_list); + + atomic_set(&sdev->num_ctx, 0); + atomic_set(&sdev->num_srq, 0); + atomic_set(&sdev->num_qp, 0); + atomic_set(&sdev->num_cq, 0); + atomic_set(&sdev->num_mem, 0); + atomic_set(&sdev->num_pd, 0); + atomic_set(&sdev->num_cep, 0); + + sdev->is_registered = 0; +out: + if (sdev) + dev_hold(netdev); + + return sdev; +} + + + +static int siw_netdev_event(struct notifier_block *nb, unsigned long event, + void *arg) +{ + struct net_device *netdev = netdev_notifier_info_to_dev(arg); + struct in_device *in_dev; + struct siw_dev *sdev; + + dprint(DBG_DM, " (dev=%s): Event %lu\n", netdev->name, event); + + if (dev_net(netdev) != &init_net) + goto done; + + sdev = siw_dev_from_netdev(netdev); + + switch (event) { + + case NETDEV_UP: + if (!sdev) + break; + + if (sdev->is_registered) { + sdev->state = IB_PORT_ACTIVE; + siw_port_event(sdev, 1, IB_EVENT_PORT_ACTIVE); + break; + } + + in_dev = in_dev_get(netdev); + if (!in_dev) { + dprint(DBG_DM, ": %s: no in_dev\n", netdev->name); + sdev->state = IB_PORT_INIT; + break; + } + + if (in_dev->ifa_list) { + sdev->state = IB_PORT_ACTIVE; + siw_device_register(sdev); + } else { + dprint(DBG_DM, ": %s: no ifa\n", netdev->name); + sdev->state = IB_PORT_INIT; + } + in_dev_put(in_dev); + + break; + + case NETDEV_DOWN: + if (sdev && sdev->is_registered) { + sdev->state = IB_PORT_DOWN; + siw_port_event(sdev, 1, IB_EVENT_PORT_ERR); + break; + } + break; + + case NETDEV_REGISTER: + if (!sdev) { + if (!siw_dev_qualified(netdev)) + break; + + sdev = siw_device_create(netdev); + if (sdev) { + sdev->state = IB_PORT_INIT; + dprint(DBG_DM, ": new siw device for %s\n", + netdev->name); + } + } + break; + + case NETDEV_UNREGISTER: + if (sdev) { + if (sdev->is_registered) + siw_device_deregister(sdev); + list_del(&sdev->list); + siw_device_destroy(sdev); + } + break; + + case NETDEV_CHANGEADDR: + if (sdev->is_registered) + siw_port_event(sdev, 1, IB_EVENT_LID_CHANGE); + + break; + /* + * Todo: Below netdev events are currently not handled. + */ + case NETDEV_CHANGEMTU: + case NETDEV_GOING_DOWN: + case NETDEV_CHANGE: + + break; + + default: + break; + } +done: + return NOTIFY_OK; +} + +static struct notifier_block siw_netdev_nb = { + .notifier_call = siw_netdev_event, +}; + +/* + * siw_init_module - Initialize Softiwarp module and register with netdev + * subsystem to create Softiwarp devices per net_device + */ +static __init int siw_init_module(void) +{ + int rv; + int nr_cpu; + + if (SENDPAGE_THRESH < SIW_MAX_INLINE) { + pr_info("siw: sendpage threshold too small: %u\n", + (int)SENDPAGE_THRESH); + rv = EINVAL; + goto out; + } + /* + * The xprtrdma module needs at least some rudimentary bus to set + * some devices path MTU. + */ + rv = bus_register(&siw_bus); + if (rv) + goto out_nobus; + + siw_generic_dma_device.bus = &siw_bus; + + rv = device_register(&siw_generic_dma_device); + if (rv) + goto out; + + rv = siw_cm_init(); + if (rv) + goto out_unregister; + + if (DPRINT_MASK) + siw_debug_init(); + + /* + * Allocate CRC SHASH object. Fail loading siw only, if CRC is + * required by kernel module + */ + siw_crypto_shash = crypto_alloc_shash("crc32c", 0, 0); + if (IS_ERR(siw_crypto_shash)) { + pr_info("siw: Loading CRC32c failed: %ld\n", + PTR_ERR(siw_crypto_shash)); + siw_crypto_shash = NULL; + if (mpa_crc_required == true) + goto out_unregister; + } + rv = register_netdevice_notifier(&siw_netdev_nb); + if (rv) { + siw_debugfs_delete(); + goto out_unregister; + } + for (nr_cpu = 0; nr_cpu < MAX_CPU; nr_cpu++) + qp_tx_thread[nr_cpu] = NULL; + + if (siw_create_tx_threads(MAX_CPU, 1) == 0) { + pr_info("Try starting default TX thread\n"); + if (siw_create_tx_threads(1, 0) == 0) { + pr_info("Could not start any TX thread\n"); + goto out_unregister; + } + } + pr_info("SoftiWARP attached\n"); + return 0; + +out_unregister: + for (nr_cpu = 0; nr_cpu < MAX_CPU; nr_cpu++) { + if (qp_tx_thread[nr_cpu]) { + siw_stop_tx_thread(nr_cpu); + qp_tx_thread[nr_cpu] = NULL; + } + } + device_unregister(&siw_generic_dma_device); + + if (siw_crypto_shash) + crypto_free_shash(siw_crypto_shash); +out: + bus_unregister(&siw_bus); +out_nobus: + pr_info("SoftIWARP attach failed. Error: %d\n", rv); + siw_cm_exit(); + + return rv; +} + + +static void __exit siw_exit_module(void) +{ + int nr_cpu; + + for (nr_cpu = 0; nr_cpu < MAX_CPU; nr_cpu++) { + if (qp_tx_thread[nr_cpu]) { + siw_stop_tx_thread(nr_cpu); + qp_tx_thread[nr_cpu] = NULL; + } + } + unregister_netdevice_notifier(&siw_netdev_nb); + + siw_cm_exit(); + + while (!list_empty(&siw_devlist)) { + struct siw_dev *sdev = + list_entry(siw_devlist.next, struct siw_dev, list); + list_del(&sdev->list); + if (sdev->is_registered) + siw_device_deregister(sdev); + + siw_device_destroy(sdev); + } + if (siw_crypto_shash) + crypto_free_shash(siw_crypto_shash); + + siw_debugfs_delete(); + + device_unregister(&siw_generic_dma_device); + + bus_unregister(&siw_bus); + + pr_info("SoftiWARP detached\n"); +} + +module_init(siw_init_module); +module_exit(siw_exit_module);
Signed-off-by: Bernard Metzler <bmt@zurich.ibm.com> --- drivers/infiniband/sw/siw/siw_main.c | 752 +++++++++++++++++++++++++++++++++++ 1 file changed, 752 insertions(+) create mode 100644 drivers/infiniband/sw/siw/siw_main.c