diff mbox

[v2,1/2] cxgb4: Add support for Inline IPSec Tx

Message ID 1510226919-20431-1-git-send-email-atul.gupta@chelsio.com (mailing list archive)
State Changes Requested
Delegated to: Herbert Xu
Headers show

Commit Message

Atul Gupta Nov. 9, 2017, 11:28 a.m. UTC
Added Tx routine for ULD
- define interface for ULD Tx.

Export routines used for Tx data
- Routines common for data transmit are used by cxgb4 and chcr
  drivers.
- EXPORT routines enable transmit from chcr driver.

Signed-off-by: Atul Gupta <atul.gupta@chelsio.com>
Signed-off-by: Harsh Jain <harsh@chelsio.com>
Signed-off-by: Ganesh Goudar <ganeshgr@chelsio.com>
---
V2: Fixed the build warnings and created patch against cryptodev
    to avoid possible merge conflicts
---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4.h         |  23 +++++
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c |   2 +
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c    |   2 +-
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c     |   1 +
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h     |   3 +
 drivers/net/ethernet/chelsio/cxgb4/sge.c           | 102 ++++++++++-----------
 6 files changed, 81 insertions(+), 52 deletions(-)
diff mbox

Patch

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index ea72d2d..0aa681e 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -58,6 +58,13 @@ 
 extern struct list_head adapter_list;
 extern struct mutex uld_mutex;
 
+/* Suspend an Ethernet Tx queue with fewer available descriptors than this.
+ * This is the same as calc_tx_descs() for a TSO packet with
+ * nr_frags == MAX_SKB_FRAGS.
+ */
+#define ETHTXQ_STOP_THRES \
+	(1 + DIV_ROUND_UP((3 * MAX_SKB_FRAGS) / 2 + (MAX_SKB_FRAGS & 1), 8))
+
 enum {
 	MAX_NPORTS	= 4,     /* max # of ports */
 	SERNUM_LEN	= 24,    /* Serial # length */
@@ -553,6 +560,7 @@  enum {                                 /* adapter flags */
 
 enum {
 	ULP_CRYPTO_LOOKASIDE = 1 << 0,
+	ULP_CRYPTO_IPSEC_INLINE = 1 << 1,
 };
 
 struct rx_sw_desc;
@@ -947,6 +955,11 @@  enum {
 	SCHED_CLASS_RATEMODE_ABS = 1,   /* Kb/s */
 };
 
+struct tx_sw_desc {                /* SW state per Tx descriptor */
+	struct sk_buff *skb;
+	struct ulptx_sgl *sgl;
+};
+
 /* Support for "sched_queue" command to allow one or more NIC TX Queues
  * to be bound to a TX Scheduling Class.
  */
@@ -1627,4 +1640,14 @@  int t4_set_vf_mac_acl(struct adapter *adapter, unsigned int vf,
 void free_tx_desc(struct adapter *adap, struct sge_txq *q,
 		  unsigned int n, bool unmap);
 void free_txq(struct adapter *adap, struct sge_txq *q);
+void cxgb4_reclaim_completed_tx(struct adapter *adap,
+				struct sge_txq *q, bool unmap);
+int cxgb4_map_skb(struct device *dev, const struct sk_buff *skb,
+		  dma_addr_t *addr);
+void cxgb4_inline_tx_skb(const struct sk_buff *skb, const struct sge_txq *q,
+			 void *pos);
+void cxgb4_write_sgl(const struct sk_buff *skb, struct sge_txq *q,
+		     struct ulptx_sgl *sgl, u64 *end, unsigned int start,
+		     const dma_addr_t *addr);
+void cxgb4_ring_tx_db(struct adapter *adap, struct sge_txq *q, int n);
 #endif /* __CXGB4_H__ */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
index 76540b0..b45bea9 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
@@ -3095,6 +3095,8 @@  static int chcr_show(struct seq_file *seq, void *v)
 		   atomic_read(&adap->chcr_stats.error));
 	seq_printf(seq, "Fallback: %10u \n",
 		   atomic_read(&adap->chcr_stats.fallback));
+	seq_printf(seq, "IPSec PDU: %10u\n",
+		   atomic_read(&adap->chcr_stats.ipsec_cnt));
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 92d9d79..2243e31 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -4005,7 +4005,7 @@  static int adap_init0(struct adapter *adap)
 		} else {
 			adap->vres.ncrypto_fc = val[0];
 		}
-		adap->params.crypto |= ULP_CRYPTO_LOOKASIDE;
+		adap->params.crypto = ntohs(caps_cmd.cryptocaps);
 		adap->num_uld += 1;
 	}
 #undef FW_PARAM_PFVF
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
index 71a315b..6b5fea4 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
@@ -637,6 +637,7 @@  static void uld_init(struct adapter *adap, struct cxgb4_lld_info *lld)
 	lld->nchan = adap->params.nports;
 	lld->nports = adap->params.nports;
 	lld->wr_cred = adap->params.ofldq_wr_cred;
+	lld->crypto = adap->params.crypto;
 	lld->iscsi_iolen = MAXRXDATA_G(t4_read_reg(adap, TP_PARA_REG2_A));
 	lld->iscsi_tagmask = t4_read_reg(adap, ULP_RX_ISCSI_TAGMASK_A);
 	lld->iscsi_pgsz_order = t4_read_reg(adap, ULP_RX_ISCSI_PSZ_A);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
index 84541fc..a91b295 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
@@ -292,6 +292,7 @@  struct chcr_stats_debug {
 	atomic_t complete;
 	atomic_t error;
 	atomic_t fallback;
+	atomic_t ipsec_cnt;
 };
 
 #define OCQ_WIN_OFFSET(pdev, vres) \
@@ -317,6 +318,7 @@  struct cxgb4_lld_info {
 	unsigned char wr_cred;               /* WR 16-byte credits */
 	unsigned char adapter_type;          /* type of adapter */
 	unsigned char fw_api_ver;            /* FW API version */
+	unsigned char crypto;                /* crypto support */
 	unsigned int fw_vers;                /* FW version */
 	unsigned int iscsi_iolen;            /* iSCSI max I/O length */
 	unsigned int cclk_ps;                /* Core clock period in psec */
@@ -365,6 +367,7 @@  struct cxgb4_uld_info {
 			      struct t4_lro_mgr *lro_mgr,
 			      struct napi_struct *napi);
 	void (*lro_flush)(struct t4_lro_mgr *);
+	int (*tx_handler)(struct sk_buff *skb, struct net_device *dev);
 };
 
 int cxgb4_register_uld(enum cxgb4_uld type, const struct cxgb4_uld_info *p);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index 6a934c7..7b4f61b 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@ -41,6 +41,7 @@ 
 #include <linux/jiffies.h>
 #include <linux/prefetch.h>
 #include <linux/export.h>
+#include <net/xfrm.h>
 #include <net/ipv6.h>
 #include <net/tcp.h>
 #include <net/busy_poll.h>
@@ -53,6 +54,7 @@ 
 #include "t4_msg.h"
 #include "t4fw_api.h"
 #include "cxgb4_ptp.h"
+#include "cxgb4_uld.h"
 
 /*
  * Rx buffer size.  We use largish buffers if possible but settle for single
@@ -110,14 +112,6 @@ 
 #define NOMEM_TMR_IDX (SGE_NTIMERS - 1)
 
 /*
- * Suspend an Ethernet Tx queue with fewer available descriptors than this.
- * This is the same as calc_tx_descs() for a TSO packet with
- * nr_frags == MAX_SKB_FRAGS.
- */
-#define ETHTXQ_STOP_THRES \
-	(1 + DIV_ROUND_UP((3 * MAX_SKB_FRAGS) / 2 + (MAX_SKB_FRAGS & 1), 8))
-
-/*
  * Suspension threshold for non-Ethernet Tx queues.  We require enough room
  * for a full sized WR.
  */
@@ -134,11 +128,6 @@ 
  */
 #define MAX_CTRL_WR_LEN SGE_MAX_WR_LEN
 
-struct tx_sw_desc {                /* SW state per Tx descriptor */
-	struct sk_buff *skb;
-	struct ulptx_sgl *sgl;
-};
-
 struct rx_sw_desc {                /* SW state per Rx descriptor */
 	struct page *page;
 	dma_addr_t dma_addr;
@@ -248,8 +237,8 @@  static inline bool fl_starving(const struct adapter *adapter,
 	return fl->avail - fl->pend_cred <= s->fl_starve_thres;
 }
 
-static int map_skb(struct device *dev, const struct sk_buff *skb,
-		   dma_addr_t *addr)
+int cxgb4_map_skb(struct device *dev, const struct sk_buff *skb,
+		  dma_addr_t *addr)
 {
 	const skb_frag_t *fp, *end;
 	const struct skb_shared_info *si;
@@ -277,6 +266,7 @@  static int map_skb(struct device *dev, const struct sk_buff *skb,
 out_err:
 	return -ENOMEM;
 }
+EXPORT_SYMBOL(cxgb4_map_skb);
 
 #ifdef CONFIG_NEED_DMA_MAP_STATE
 static void unmap_skb(struct device *dev, const struct sk_buff *skb,
@@ -411,7 +401,7 @@  static inline int reclaimable(const struct sge_txq *q)
 }
 
 /**
- *	reclaim_completed_tx - reclaims completed Tx descriptors
+ *	cxgb4_reclaim_completed_tx - reclaims completed Tx descriptors
  *	@adap: the adapter
  *	@q: the Tx queue to reclaim completed descriptors from
  *	@unmap: whether the buffers should be unmapped for DMA
@@ -420,7 +410,7 @@  static inline int reclaimable(const struct sge_txq *q)
  *	and frees the associated buffers if possible.  Called with the Tx
  *	queue locked.
  */
-static inline void reclaim_completed_tx(struct adapter *adap, struct sge_txq *q,
+inline void cxgb4_reclaim_completed_tx(struct adapter *adap, struct sge_txq *q,
 					bool unmap)
 {
 	int avail = reclaimable(q);
@@ -437,6 +427,7 @@  static inline void reclaim_completed_tx(struct adapter *adap, struct sge_txq *q,
 		q->in_use -= avail;
 	}
 }
+EXPORT_SYMBOL(cxgb4_reclaim_completed_tx);
 
 static inline int get_buf_size(struct adapter *adapter,
 			       const struct rx_sw_desc *d)
@@ -833,7 +824,7 @@  static inline unsigned int calc_tx_descs(const struct sk_buff *skb)
 }
 
 /**
- *	write_sgl - populate a scatter/gather list for a packet
+ *	cxgb4_write_sgl - populate a scatter/gather list for a packet
  *	@skb: the packet
  *	@q: the Tx queue we are writing into
  *	@sgl: starting location for writing the SGL
@@ -849,9 +840,9 @@  static inline unsigned int calc_tx_descs(const struct sk_buff *skb)
  *	right after the end of the SGL but does not account for any potential
  *	wrap around, i.e., @end > @sgl.
  */
-static void write_sgl(const struct sk_buff *skb, struct sge_txq *q,
-		      struct ulptx_sgl *sgl, u64 *end, unsigned int start,
-		      const dma_addr_t *addr)
+void cxgb4_write_sgl(const struct sk_buff *skb, struct sge_txq *q,
+		     struct ulptx_sgl *sgl, u64 *end, unsigned int start,
+		     const dma_addr_t *addr)
 {
 	unsigned int i, len;
 	struct ulptx_sge_pair *to;
@@ -903,6 +894,7 @@  static void write_sgl(const struct sk_buff *skb, struct sge_txq *q,
 	if ((uintptr_t)end & 8)           /* 0-pad to multiple of 16 */
 		*end = 0;
 }
+EXPORT_SYMBOL(cxgb4_write_sgl);
 
 /* This function copies 64 byte coalesced work request to
  * memory mapped BAR2 space. For coalesced WR SGE fetches
@@ -921,14 +913,14 @@  static void cxgb_pio_copy(u64 __iomem *dst, u64 *src)
 }
 
 /**
- *	ring_tx_db - check and potentially ring a Tx queue's doorbell
+ *	cxgb4_ring_tx_db - check and potentially ring a Tx queue's doorbell
  *	@adap: the adapter
  *	@q: the Tx queue
  *	@n: number of new descriptors to give to HW
  *
  *	Ring the doorbel for a Tx queue.
  */
-static inline void ring_tx_db(struct adapter *adap, struct sge_txq *q, int n)
+inline void cxgb4_ring_tx_db(struct adapter *adap, struct sge_txq *q, int n)
 {
 	/* Make sure that all writes to the TX Descriptors are committed
 	 * before we tell the hardware about them.
@@ -995,9 +987,10 @@  static inline void ring_tx_db(struct adapter *adap, struct sge_txq *q, int n)
 		wmb();
 	}
 }
+EXPORT_SYMBOL(cxgb4_ring_tx_db);
 
 /**
- *	inline_tx_skb - inline a packet's data into Tx descriptors
+ *	cxgb4_inline_tx_skb - inline a packet's data into Tx descriptors
  *	@skb: the packet
  *	@q: the Tx queue where the packet will be inlined
  *	@pos: starting position in the Tx queue where to inline the packet
@@ -1007,8 +1000,8 @@  static inline void ring_tx_db(struct adapter *adap, struct sge_txq *q, int n)
  *	Most of the complexity of this operation is dealing with wrap arounds
  *	in the middle of the packet we want to inline.
  */
-static void inline_tx_skb(const struct sk_buff *skb, const struct sge_txq *q,
-			  void *pos)
+void cxgb4_inline_tx_skb(const struct sk_buff *skb,
+			 const struct sge_txq *q, void *pos)
 {
 	u64 *p;
 	int left = (void *)q->stat - pos;
@@ -1030,6 +1023,7 @@  static void inline_tx_skb(const struct sk_buff *skb, const struct sge_txq *q,
 	if ((uintptr_t)p & 8)
 		*p = 0;
 }
+EXPORT_SYMBOL(cxgb4_inline_tx_skb);
 
 static void *inline_tx_skb_header(const struct sk_buff *skb,
 				  const struct sge_txq *q,  void *pos,
@@ -1199,6 +1193,12 @@  netdev_tx_t t4_eth_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	pi = netdev_priv(dev);
 	adap = pi->adapter;
+	ssi = skb_shinfo(skb);
+#ifdef CONFIG_CHELSIO_IPSEC_INLINE
+	if (xfrm_offload(skb) && !ssi->gso_size)
+		return adap->uld[CXGB4_ULD_CRYPTO].tx_handler(skb, dev);
+#endif /* CHELSIO_IPSEC_INLINE */
+
 	qidx = skb_get_queue_mapping(skb);
 	if (ptp_enabled) {
 		spin_lock(&adap->ptp_lock);
@@ -1215,7 +1215,7 @@  netdev_tx_t t4_eth_xmit(struct sk_buff *skb, struct net_device *dev)
 	}
 	skb_tx_timestamp(skb);
 
-	reclaim_completed_tx(adap, &q->q, true);
+	cxgb4_reclaim_completed_tx(adap, &q->q, true);
 	cntrl = TXPKT_L4CSUM_DIS_F | TXPKT_IPCSUM_DIS_F;
 
 #ifdef CONFIG_CHELSIO_T4_FCOE
@@ -1245,7 +1245,7 @@  netdev_tx_t t4_eth_xmit(struct sk_buff *skb, struct net_device *dev)
 		immediate = true;
 
 	if (!immediate &&
-	    unlikely(map_skb(adap->pdev_dev, skb, addr) < 0)) {
+	    unlikely(cxgb4_map_skb(adap->pdev_dev, skb, addr) < 0)) {
 		q->mapping_err++;
 		if (ptp_enabled)
 			spin_unlock(&adap->ptp_lock);
@@ -1264,7 +1264,6 @@  netdev_tx_t t4_eth_xmit(struct sk_buff *skb, struct net_device *dev)
 	end = (u64 *)wr + flits;
 
 	len = immediate ? skb->len : 0;
-	ssi = skb_shinfo(skb);
 	if (ssi->gso_size) {
 		struct cpl_tx_pkt_lso *lso = (void *)wr;
 		bool v6 = (ssi->gso_type & SKB_GSO_TCPV6) != 0;
@@ -1341,13 +1340,13 @@  netdev_tx_t t4_eth_xmit(struct sk_buff *skb, struct net_device *dev)
 	cpl->ctrl1 = cpu_to_be64(cntrl);
 
 	if (immediate) {
-		inline_tx_skb(skb, &q->q, cpl + 1);
+		cxgb4_inline_tx_skb(skb, &q->q, cpl + 1);
 		dev_consume_skb_any(skb);
 	} else {
 		int last_desc;
 
-		write_sgl(skb, &q->q, (struct ulptx_sgl *)(cpl + 1), end, 0,
-			  addr);
+		cxgb4_write_sgl(skb, &q->q, (struct ulptx_sgl *)(cpl + 1),
+				end, 0, addr);
 		skb_orphan(skb);
 
 		last_desc = q->q.pidx + ndesc - 1;
@@ -1359,7 +1358,7 @@  netdev_tx_t t4_eth_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	txq_advance(&q->q, ndesc);
 
-	ring_tx_db(adap, &q->q, ndesc);
+	cxgb4_ring_tx_db(adap, &q->q, ndesc);
 	if (ptp_enabled)
 		spin_unlock(&adap->ptp_lock);
 	return NETDEV_TX_OK;
@@ -1369,9 +1368,9 @@  netdev_tx_t t4_eth_xmit(struct sk_buff *skb, struct net_device *dev)
  *	reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
  *	@q: the SGE control Tx queue
  *
- *	This is a variant of reclaim_completed_tx() that is used for Tx queues
- *	that send only immediate data (presently just the control queues) and
- *	thus do not have any sk_buffs to release.
+ *	This is a variant of cxgb4_reclaim_completed_tx() that is used
+ *	for Tx queues that send only immediate data (presently just
+ *	the control queues) and	thus do not have any sk_buffs to release.
  */
 static inline void reclaim_completed_tx_imm(struct sge_txq *q)
 {
@@ -1446,13 +1445,13 @@  static int ctrl_xmit(struct sge_ctrl_txq *q, struct sk_buff *skb)
 	}
 
 	wr = (struct fw_wr_hdr *)&q->q.desc[q->q.pidx];
-	inline_tx_skb(skb, &q->q, wr);
+	cxgb4_inline_tx_skb(skb, &q->q, wr);
 
 	txq_advance(&q->q, ndesc);
 	if (unlikely(txq_avail(&q->q) < TXQ_STOP_THRES))
 		ctrlq_check_stop(q, wr);
 
-	ring_tx_db(q->adap, &q->q, ndesc);
+	cxgb4_ring_tx_db(q->adap, &q->q, ndesc);
 	spin_unlock(&q->sendq.lock);
 
 	kfree_skb(skb);
@@ -1487,7 +1486,7 @@  static void restart_ctrlq(unsigned long data)
 		txq_advance(&q->q, ndesc);
 		spin_unlock(&q->sendq.lock);
 
-		inline_tx_skb(skb, &q->q, wr);
+		cxgb4_inline_tx_skb(skb, &q->q, wr);
 		kfree_skb(skb);
 
 		if (unlikely(txq_avail(&q->q) < TXQ_STOP_THRES)) {
@@ -1500,14 +1499,15 @@  static void restart_ctrlq(unsigned long data)
 			}
 		}
 		if (written > 16) {
-			ring_tx_db(q->adap, &q->q, written);
+			cxgb4_ring_tx_db(q->adap, &q->q, written);
 			written = 0;
 		}
 		spin_lock(&q->sendq.lock);
 	}
 	q->full = 0;
-ringdb: if (written)
-		ring_tx_db(q->adap, &q->q, written);
+ringdb:
+	if (written)
+		cxgb4_ring_tx_db(q->adap, &q->q, written);
 	spin_unlock(&q->sendq.lock);
 }
 
@@ -1650,7 +1650,7 @@  static void service_ofldq(struct sge_uld_txq *q)
 		 */
 		spin_unlock(&q->sendq.lock);
 
-		reclaim_completed_tx(q->adap, &q->q, false);
+		cxgb4_reclaim_completed_tx(q->adap, &q->q, false);
 
 		flits = skb->priority;                /* previously saved */
 		ndesc = flits_to_desc(flits);
@@ -1661,9 +1661,9 @@  static void service_ofldq(struct sge_uld_txq *q)
 
 		pos = (u64 *)&q->q.desc[q->q.pidx];
 		if (is_ofld_imm(skb))
-			inline_tx_skb(skb, &q->q, pos);
-		else if (map_skb(q->adap->pdev_dev, skb,
-				 (dma_addr_t *)skb->head)) {
+			cxgb4_inline_tx_skb(skb, &q->q, pos);
+		else if (cxgb4_map_skb(q->adap->pdev_dev, skb,
+				       (dma_addr_t *)skb->head)) {
 			txq_stop_maperr(q);
 			spin_lock(&q->sendq.lock);
 			break;
@@ -1694,9 +1694,9 @@  static void service_ofldq(struct sge_uld_txq *q)
 				pos = (void *)txq->desc;
 			}
 
-			write_sgl(skb, &q->q, (void *)pos,
-				  end, hdr_len,
-				  (dma_addr_t *)skb->head);
+			cxgb4_write_sgl(skb, &q->q, (void *)pos,
+					end, hdr_len,
+					(dma_addr_t *)skb->head);
 #ifdef CONFIG_NEED_DMA_MAP_STATE
 			skb->dev = q->adap->port[0];
 			skb->destructor = deferred_unmap_destructor;
@@ -1710,7 +1710,7 @@  static void service_ofldq(struct sge_uld_txq *q)
 		txq_advance(&q->q, ndesc);
 		written += ndesc;
 		if (unlikely(written > 32)) {
-			ring_tx_db(q->adap, &q->q, written);
+			cxgb4_ring_tx_db(q->adap, &q->q, written);
 			written = 0;
 		}
 
@@ -1725,7 +1725,7 @@  static void service_ofldq(struct sge_uld_txq *q)
 			kfree_skb(skb);
 	}
 	if (likely(written))
-		ring_tx_db(q->adap, &q->q, written);
+		cxgb4_ring_tx_db(q->adap, &q->q, written);
 
 	/*Indicate that no thread is processing the Pending Send Queue
 	 * currently.