diff mbox series

[net-next,v2,17/17] ethernet: ucc_geth: simplify rx/tx allocations

Message ID 20210119150802.19997-18-rasmus.villemoes@prevas.dk (mailing list archive)
State Accepted
Commit 9b0dfef4755301d9f7fcef63e2f64d23649bebb4
Delegated to: Netdev Maintainers
Headers show
Series ucc_geth improvements | expand

Checks

Context Check Description
netdev/cover_letter success Link
netdev/fixes_present success Link
netdev/patch_count fail Series longer than 15 patches
netdev/tree_selection success Clearly marked for net-next
netdev/subject_prefix success Link
netdev/cc_maintainers warning 1 maintainers not CCed: linuxppc-dev@lists.ozlabs.org
netdev/source_inline success Was 0 now: 0
netdev/verify_signedoff success Link
netdev/module_param success Was 0 now: 0
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/verify_fixes success Link
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 92 lines checked
netdev/build_allmodconfig_warn success Errors and warnings before: 0 this patch: 0
netdev/header_inline success Link
netdev/stable success Stable not CCed

Commit Message

Rasmus Villemoes Jan. 19, 2021, 3:08 p.m. UTC
Since kmalloc() is nowadays [1] guaranteed to return naturally
aligned (i.e., aligned to the size itself) memory for power-of-2
sizes, we don't need to over-allocate the align amount, compute an
aligned address within the allocation, and (for later freeing) also
storing the original pointer [2].

Instead, just round up the length we want to allocate to the alignment
requirements, then round that up to the next power of 2. In theory,
this could allocate up to about twice as much memory as we needed.  In
practice, (a) kmalloc() would in most cases anyway return a
power-of-2-sized allocation and (b) with the default values of the
bdRingLen[RT]x fields, the length is already itself a power of 2
greater than the alignment.

So we actually end up saving memory compared to the current
situtation (e.g. for tx, we currently allocate 128+32 bytes, which
kmalloc() likely rounds up to 192 or 256; with this patch, we just
allocate 128 bytes.) Also struct ucc_geth_private becomes a little
smaller.

[1] 59bb47985c1d ("mm, sl[aou]b: guarantee natural alignment for
kmalloc(power-of-two)")

[2] That storing was anyway done in a u32, which works on 32 bit
machines, but is not very elegant and certainly makes a reader of the
code pause for a while.

Signed-off-by: Rasmus Villemoes <rasmus.villemoes@prevas.dk>
---
 drivers/net/ethernet/freescale/ucc_geth.c | 50 ++++++++---------------
 drivers/net/ethernet/freescale/ucc_geth.h |  2 -
 2 files changed, 17 insertions(+), 35 deletions(-)
diff mbox series

Patch

diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c
index 9be1d4455a6b..ef4e2febeb5b 100644
--- a/drivers/net/ethernet/freescale/ucc_geth.c
+++ b/drivers/net/ethernet/freescale/ucc_geth.c
@@ -1835,7 +1835,7 @@  static void ucc_geth_free_rx(struct ucc_geth_private *ugeth)
 
 			kfree(ugeth->rx_skbuff[i]);
 
-			kfree((void *)ugeth->rx_bd_ring_offset[i]);
+			kfree(ugeth->p_rx_bd_ring[i]);
 			ugeth->p_rx_bd_ring[i] = NULL;
 		}
 	}
@@ -1872,10 +1872,8 @@  static void ucc_geth_free_tx(struct ucc_geth_private *ugeth)
 
 		kfree(ugeth->tx_skbuff[i]);
 
-		if (ugeth->p_tx_bd_ring[i]) {
-			kfree((void *)ugeth->tx_bd_ring_offset[i]);
-			ugeth->p_tx_bd_ring[i] = NULL;
-		}
+		kfree(ugeth->p_tx_bd_ring[i]);
+		ugeth->p_tx_bd_ring[i] = NULL;
 	}
 
 }
@@ -2150,25 +2148,15 @@  static int ucc_geth_alloc_tx(struct ucc_geth_private *ugeth)
 
 	/* Allocate Tx bds */
 	for (j = 0; j < ucc_geth_tx_queues(ug_info); j++) {
-		u32 align = UCC_GETH_TX_BD_RING_ALIGNMENT;
-
-		/* Allocate in multiple of
-		   UCC_GETH_TX_BD_RING_SIZE_MEMORY_ALIGNMENT,
-		   according to spec */
-		length = ((ug_info->bdRingLenTx[j] * sizeof(struct qe_bd))
-			  / UCC_GETH_TX_BD_RING_SIZE_MEMORY_ALIGNMENT)
-		    * UCC_GETH_TX_BD_RING_SIZE_MEMORY_ALIGNMENT;
-		if ((ug_info->bdRingLenTx[j] * sizeof(struct qe_bd)) %
-		    UCC_GETH_TX_BD_RING_SIZE_MEMORY_ALIGNMENT)
-			length += UCC_GETH_TX_BD_RING_SIZE_MEMORY_ALIGNMENT;
-
-		ugeth->tx_bd_ring_offset[j] =
-			(u32) kmalloc((u32) (length + align), GFP_KERNEL);
-
-		if (ugeth->tx_bd_ring_offset[j] != 0)
-			ugeth->p_tx_bd_ring[j] =
-				(u8 __iomem *)((ugeth->tx_bd_ring_offset[j] +
-						align) & ~(align - 1));
+		u32 align = max(UCC_GETH_TX_BD_RING_ALIGNMENT,
+				UCC_GETH_TX_BD_RING_SIZE_MEMORY_ALIGNMENT);
+		u32 alloc;
+
+		length = ug_info->bdRingLenTx[j] * sizeof(struct qe_bd);
+		alloc = round_up(length, align);
+		alloc = roundup_pow_of_two(alloc);
+
+		ugeth->p_tx_bd_ring[j] = kmalloc(alloc, GFP_KERNEL);
 
 		if (!ugeth->p_tx_bd_ring[j]) {
 			if (netif_msg_ifup(ugeth))
@@ -2176,9 +2164,7 @@  static int ucc_geth_alloc_tx(struct ucc_geth_private *ugeth)
 			return -ENOMEM;
 		}
 		/* Zero unused end of bd ring, according to spec */
-		memset_io((void __iomem *)(ugeth->p_tx_bd_ring[j] +
-		       ug_info->bdRingLenTx[j] * sizeof(struct qe_bd)), 0,
-		       length - ug_info->bdRingLenTx[j] * sizeof(struct qe_bd));
+		memset(ugeth->p_tx_bd_ring[j] + length, 0, alloc - length);
 	}
 
 	/* Init Tx bds */
@@ -2225,15 +2211,13 @@  static int ucc_geth_alloc_rx(struct ucc_geth_private *ugeth)
 	/* Allocate Rx bds */
 	for (j = 0; j < ucc_geth_rx_queues(ug_info); j++) {
 		u32 align = UCC_GETH_RX_BD_RING_ALIGNMENT;
+		u32 alloc;
 
 		length = ug_info->bdRingLenRx[j] * sizeof(struct qe_bd);
-		ugeth->rx_bd_ring_offset[j] =
-			(u32) kmalloc((u32) (length + align), GFP_KERNEL);
-		if (ugeth->rx_bd_ring_offset[j] != 0)
-			ugeth->p_rx_bd_ring[j] =
-				(u8 __iomem *)((ugeth->rx_bd_ring_offset[j] +
-						align) & ~(align - 1));
+		alloc = round_up(length, align);
+		alloc = roundup_pow_of_two(alloc);
 
+		ugeth->p_rx_bd_ring[j] = kmalloc(alloc, GFP_KERNEL);
 		if (!ugeth->p_rx_bd_ring[j]) {
 			if (netif_msg_ifup(ugeth))
 				pr_err("Can not allocate memory for Rx bd rings\n");
diff --git a/drivers/net/ethernet/freescale/ucc_geth.h b/drivers/net/ethernet/freescale/ucc_geth.h
index 6539fed9cc22..ccc4ca1ae9b6 100644
--- a/drivers/net/ethernet/freescale/ucc_geth.h
+++ b/drivers/net/ethernet/freescale/ucc_geth.h
@@ -1182,9 +1182,7 @@  struct ucc_geth_private {
 	struct ucc_geth_rx_bd_queues_entry __iomem *p_rx_bd_qs_tbl;
 	u32 rx_bd_qs_tbl_offset;
 	u8 __iomem *p_tx_bd_ring[NUM_TX_QUEUES];
-	u32 tx_bd_ring_offset[NUM_TX_QUEUES];
 	u8 __iomem *p_rx_bd_ring[NUM_RX_QUEUES];
-	u32 rx_bd_ring_offset[NUM_RX_QUEUES];
 	u8 __iomem *confBd[NUM_TX_QUEUES];
 	u8 __iomem *txBd[NUM_TX_QUEUES];
 	u8 __iomem *rxBd[NUM_RX_QUEUES];