diff mbox

[3/3,v3] net/macb: Try to optimize struct macb layout

Message ID d0bc206ace3baaf681e111ba07c4d0b576839bb4.1368608928.git.nicolas.ferre@atmel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Nicolas Ferre May 15, 2013, 9:18 a.m. UTC
From: Havard Skinnemoen <havard@skinnemoen.net>

Move TX-related fields to the top of the struct so that they end up on
the same cache line. Move the NAPI struct below that since it is used
from the interrupt handler. This field is also marked as
___cacheline_aligned_in_smp.
RX-related fields go below those.
Function pointers and capability mask are immediately after that as
they are also used in the hot path.

Move the spinlock before regs since they are usually used together.

Signed-off-by: Havard Skinnemoen <havard@skinnemoen.net>
[nicolas.ferre@atmel.com: adapt to newer kernel]
Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
---
 drivers/net/ethernet/cadence/macb.h | 29 +++++++++++++++--------------
 1 file changed, 15 insertions(+), 14 deletions(-)

Comments

David Laight May 15, 2013, 9:37 a.m. UTC | #1
> From: Havard Skinnemoen <havard@skinnemoen.net>

> Move the NAPI struct below that since it is used
> from the interrupt handler. This field is also marked as
> ___cacheline_aligned_in_smp.

Is that a good idea if the cache line size is 256 bytes?
Indeed is that ever sane - except for a few special structures
that really need it, and are guaranteed to be allocated
with that much alignment.

	David
David Laight May 15, 2013, 9:38 a.m. UTC | #2
> Move TX-related fields to the top of the struct so that they end up on
> the same cache line. Move the NAPI struct below that since it is used
> from the interrupt handler. This field is also marked as
> ___cacheline_aligned_in_smp.
> RX-related fields go below those.
> Function pointers and capability mask are immediately after that as
> they are also used in the hot path.
> 
> Move the spinlock before regs since they are usually used together.

Haven't you introduced some holes in the structure?
I suspect that spinloack_t might only be 32bits on
some 64bit systems.
There is certainly one where 'caps' used to be.

...> 
>  struct macb {
> +	spinlock_t		lock;
>  	void __iomem		*regs;
...
> +	struct macb_or_gem_ops	macbgem_ops;
> +
> +	u32			caps;
> 
> -	spinlock_t		lock;
>  	struct platform_device	*pdev;

	David
Nicolas Ferre May 28, 2013, 8:08 a.m. UTC | #3
On 15/05/2013 11:37, David Laight :
>> From: Havard Skinnemoen <havard@skinnemoen.net>
>
>> Move the NAPI struct below that since it is used
>> from the interrupt handler. This field is also marked as
>> ___cacheline_aligned_in_smp.
>
> Is that a good idea if the cache line size is 256 bytes?
> Indeed is that ever sane - except for a few special structures
> that really need it, and are guaranteed to be allocated
> with that much alignment.

I do not know if that was really needed, it was a recommendation by Ben.
(previous email here:
https://patchwork.kernel.org/patch/1887101/
)

Anyway, it seems that this patch needs more work whereas the others of 
the series are not commented.

So maybe, I can resend patches 1, 2 for making this big enhancement go 
forward (as it is in review process for quite a long time...).
David, do you want me to resend or can you retrieve these v3 patches in 
patchwork?

Best regards,
diff mbox

Patch

diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h
index f407615..6c8e38a2 100644
--- a/drivers/net/ethernet/cadence/macb.h
+++ b/drivers/net/ethernet/cadence/macb.h
@@ -555,46 +555,47 @@  struct macb_or_gem_ops {
 };
 
 struct macb {
+	spinlock_t		lock;
 	void __iomem		*regs;
 
+	unsigned int		tx_head;
+	unsigned int		tx_tail;
+	struct macb_dma_desc	*tx_ring;
+	struct macb_tx_skb	*tx_skb;
+	dma_addr_t		tx_ring_dma;
+	struct work_struct	tx_error_task;
+
+	struct napi_struct	napi ____cacheline_aligned_in_smp;
+
 	unsigned int		rx_tail;
 	unsigned int		rx_prepared_head;
 	struct macb_dma_desc	*rx_ring;
 	struct sk_buff		**rx_skbuff;
 	void			*rx_buffers;
 	size_t			rx_buffer_size;
+	dma_addr_t		rx_ring_dma;
+	dma_addr_t		rx_buffers_dma;
 
-	unsigned int		tx_head, tx_tail;
-	struct macb_dma_desc	*tx_ring;
-	struct macb_tx_skb	*tx_skb;
+	struct macb_or_gem_ops	macbgem_ops;
+
+	u32			caps;
 
-	spinlock_t		lock;
 	struct platform_device	*pdev;
 	struct clk		*pclk;
 	struct clk		*hclk;
 	struct net_device	*dev;
-	struct napi_struct	napi;
-	struct work_struct	tx_error_task;
 	struct net_device_stats	stats;
 	union {
 		struct macb_stats	macb;
 		struct gem_stats	gem;
 	}			hw_stats;
 
-	dma_addr_t		rx_ring_dma;
-	dma_addr_t		tx_ring_dma;
-	dma_addr_t		rx_buffers_dma;
-
-	struct macb_or_gem_ops	macbgem_ops;
-
 	struct mii_bus		*mii_bus;
 	struct phy_device	*phy_dev;
 	unsigned int 		link;
 	unsigned int 		speed;
 	unsigned int 		duplex;
 
-	u32			caps;
-
 	phy_interface_t		phy_interface;
 
 	/* AT91RM9200 transmit */