From patchwork Fri Jul 1 13:18:55 2011 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Robert Pearson X-Patchwork-Id: 938972 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by demeter1.kernel.org (8.14.4/8.14.4) with ESMTP id p61LTcRv014702 for ; Fri, 1 Jul 2011 21:51:06 GMT Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1757455Ab1GAVOM (ORCPT ); Fri, 1 Jul 2011 17:14:12 -0400 Received: from cdptpa-bc-oedgelb.mail.rr.com ([75.180.133.32]:58949 "EHLO cdptpa-bc-oedgelb.mail.rr.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1757417Ab1GAVOL (ORCPT ); Fri, 1 Jul 2011 17:14:11 -0400 X-Greylist: IP, sender and recipient auto-whitelisted, not delayed by milter-greylist-4.2.6 (demeter1.kernel.org [140.211.167.41]); Fri, 01 Jul 2011 21:51:06 +0000 (UTC) X-Greylist: delayed 300 seconds by postgrey-1.27 at vger.kernel.org; Fri, 01 Jul 2011 17:13:31 EDT Received: from cdptpa-bc-oedgelb.mail.rr.com ([10.127.134.102]) by cdptpa-bc-qmta02.mail.rr.com with ESMTP id <20110701211004547.IHBC4903@cdptpa-bc-qmta02.mail.rr.com> for ; Fri, 1 Jul 2011 21:10:04 +0000 Authentication-Results: cdptpa-bc-oedgelb.mail.rr.com smtp.user=fzago@systemfabricworks.com; auth=pass (PLAIN) X-Authority-Analysis: v=1.1 cv=QcSFu2tMqX8VyBnwf4xZriMeG3TVj1s8v1Rcea0EwGI= c=1 sm=0 a=hAzdGUM1iB0A:10 a=zUiNiVfTjo4A:10 a=ozIaqLvjkoIA:10 a=DCwX0kaxZCiV3mmbfDr8nQ==:17 a=YORvzBCaAAAA:8 a=bC7xisPkAAAA:8 a=XmB_V907SRRk-sK5Lw4A:9 a=gkoZ4ChU69F2GmtrMcIA:7 a=QLxd5cu_Zb8A:10 a=VV2__AUApEoA:10 a=DCwX0kaxZCiV3mmbfDr8nQ==:117 X-Cloudmark-Score: 0 X-Originating-IP: 67.79.195.91 Received: from [67.79.195.91] ([67.79.195.91:36997] helo=[10.0.2.91]) by cdptpa-bc-oedge01.mail.rr.com (envelope-from ) (ecelerity 2.2.3.46 r()) with ESMTPA id 14/2C-18213-5673E0E4; Fri, 01 Jul 2011 21:08:53 +0000 Message-Id: <20110701132202.342196794@systemfabricworks.com> References: <20110701131821.928693424@systemfabricworks.com> User-Agent: quilt/0.46-1 Date: Fri, 01 Jul 2011 08:18:55 -0500 From: rpearson@systemfabricworks.com To: linux-rdma@vger.kernel.org Cc: Bob Pearson Subject: [patch 34/44] rxe_arbiter.c Content-Disposition: inline; filename=patch34 Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org packet output arbitration. Signed-off-by: Bob Pearson --- drivers/infiniband/hw/rxe/rxe_arbiter.c | 192 ++++++++++++++++++++++++++++++++ 1 file changed, 192 insertions(+) Index: infiniband/drivers/infiniband/hw/rxe/rxe_arbiter.c =================================================================== --- /dev/null +++ infiniband/drivers/infiniband/hw/rxe/rxe_arbiter.c @@ -0,0 +1,192 @@ +/* + * Copyright (c) 2009-2011 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2009-2011 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include + +#include "rxe.h" +#include "rxe_loc.h" +#include "rxe_qp.h" + +static inline void account_skb(struct rxe_dev *rxe, struct rxe_qp *qp, + int is_request) +{ + if (is_request & RXE_REQ_MASK) { + atomic_dec(&rxe->req_skb_out); + atomic_dec(&qp->req_skb_out); + if (qp->need_req_skb) { + if (atomic_read(&qp->req_skb_out) < rxe_max_skb_per_qp) + rxe_run_task(&qp->req.task, 1); + } + } else { + atomic_dec(&rxe->resp_skb_out); + atomic_dec(&qp->resp_skb_out); + } +} + +static int xmit_one_packet(struct rxe_dev *rxe, struct rxe_qp *qp, + struct sk_buff *skb) +{ + int err; + struct timespec time; + long new_delay; + struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); + int is_request = pkt->mask & RXE_REQ_MASK; + + /* drop pkt if qp is in wrong state to send */ + if (!qp->valid) + goto drop; + + if (is_request) { + if (qp->req.state != QP_STATE_READY) + goto drop; + } else { + if (qp->resp.state != QP_STATE_READY) + goto drop; + } + + /* busy wait for static rate control + we could refine this by yielding the tasklet + for larger delays and waiting out the small ones */ + if (rxe->arbiter.delay) + do { + getnstimeofday(&time); + } while (timespec_compare(&time, &rxe->arbiter.time) < 0); + + new_delay = (skb->len*rxe_nsec_per_kbyte) >> 10; + if (new_delay < rxe_nsec_per_packet) + new_delay = rxe_nsec_per_packet; + + if (pkt->mask & RXE_LOOPBACK_MASK) + err = rxe->ifc_ops->loopback(rxe, skb); + else + err = rxe->ifc_ops->send(rxe, skb); + + /* we can recover from RXE_QUEUE_STOPPED errors + by retrying the packet. In other cases + the packet is consumed so move on */ + if (err == RXE_QUEUE_STOPPED) + return err; + else if (err) + rxe->xmit_errors++; + + rxe->arbiter.delay = new_delay > 0; + if (rxe->arbiter.delay) { + getnstimeofday(&time); + time.tv_nsec += new_delay; + while (time.tv_nsec > NSEC_PER_SEC) { + time.tv_sec += 1; + time.tv_nsec -= NSEC_PER_SEC; + } + rxe->arbiter.time = time; + } + + goto done; + +drop: + kfree_skb(skb); + err = 0; +done: + account_skb(rxe, qp, is_request); + return err; +} + +/* + * choose one packet for sending + */ +int rxe_arbiter(void *arg) +{ + int err; + unsigned long flags; + struct rxe_dev *rxe = (struct rxe_dev *)arg; + struct sk_buff *skb; + struct list_head *qpl; + struct rxe_qp *qp; + + /* get the next qp's send queue */ + spin_lock_irqsave(&rxe->arbiter.list_lock, flags); + if (list_empty(&rxe->arbiter.qp_list)) { + spin_unlock_irqrestore(&rxe->arbiter.list_lock, flags); + return 1; + } + + qpl = rxe->arbiter.qp_list.next; + list_del_init(qpl); + qp = list_entry(qpl, struct rxe_qp, arbiter_list); + spin_unlock_irqrestore(&rxe->arbiter.list_lock, flags); + + /* get next packet from queue and try to send it + note skb could have already been removed */ + skb = skb_dequeue(&qp->send_pkts); + if (skb) { + err = xmit_one_packet(rxe, qp, skb); + if (err) { + if (err == RXE_QUEUE_STOPPED) + skb_queue_head(&qp->send_pkts, skb); + rxe_run_task(&rxe->arbiter.task, 1); + spin_unlock_irqrestore(&rxe->arbiter.list_lock, flags); + return 1; + } + } + + /* if more work in queue put qp back on the list */ + spin_lock_irqsave(&rxe->arbiter.list_lock, flags); + + if (list_empty(qpl) && !skb_queue_empty(&qp->send_pkts)) + list_add_tail(qpl, &rxe->arbiter.qp_list); + + spin_unlock_irqrestore(&rxe->arbiter.list_lock, flags); + return 0; +} + +/* + * queue a packet for sending from a qp + */ +void arbiter_skb_queue(struct rxe_dev *rxe, struct rxe_qp *qp, + struct sk_buff *skb) +{ + int must_sched; + unsigned long flags; + + /* add packet to send queue */ + skb_queue_tail(&qp->send_pkts, skb); + + /* if not already there add qp to arbiter list */ + spin_lock_irqsave(&rxe->arbiter.list_lock, flags); + if (list_empty(&qp->arbiter_list)) + list_add_tail(&qp->arbiter_list, &rxe->arbiter.qp_list); + spin_unlock_irqrestore(&rxe->arbiter.list_lock, flags); + + /* run the arbiter, use tasklet unless only one packet */ + must_sched = skb_queue_len(&qp->resp_pkts) > 1; + rxe_run_task(&rxe->arbiter.task, must_sched); +}