ag71xx: Reorder ag71xx struct members for better cache performance
authorRosen Penev <rosenp@gmail.com>
Mon, 4 Dec 2017 19:40:23 +0000 (11:40 -0800)
committerFelix Fietkau <nbd@nbd.name>
Mon, 5 Feb 2018 09:16:25 +0000 (10:16 +0100)
Qualcomm claims this improves the D-cache footprint. Origina commit message below:

From: Ben Menchaca <ben.menchaca@qca.qualcomm.com>
Date: Fri, 7 Jun 2013 10:57:28 -0500
Subject: [ag71xx] cluster/align structs for cache perf

Cluster the frequently used, per-packet structures in ag71xx near
to each other, and cacheline-align them.  Some other re-ordering
occurred to move "warmer" structures near the per-packet structures.

Signed-off-by: Ben Menchaca <ben.menchaca@qca.qualcomm.com>
Signed-off-by: Rosen Penev <rosenp@gmail.com>
target/linux/ar71xx/files/drivers/net/ethernet/atheros/ag71xx/ag71xx.h

index a7128129163e92a6cf789d2316a739758e5cf698..5ead6b316a3cba80ab9e6e238f535ff90db63b50 100644 (file)
@@ -153,20 +153,31 @@ struct ag71xx_debug {
 };
 
 struct ag71xx {
 };
 
 struct ag71xx {
-       void __iomem            *mac_base;
+       /*
+        * Critical data related to the per-packet data path are clustered
+        * early in this structure to help improve the D-cache footprint.
+        */
+       struct ag71xx_ring      rx_ring ____cacheline_aligned;
+       struct ag71xx_ring      tx_ring ____cacheline_aligned;
+
+       unsigned int            max_frame_len;
+       unsigned int            desc_pktlen_mask;
+       unsigned int            rx_buf_size;
 
 
-       spinlock_t              lock;
-       struct platform_device  *pdev;
        struct net_device       *dev;
        struct net_device       *dev;
+       struct platform_device  *pdev;
+       spinlock_t              lock;
        struct napi_struct      napi;
        u32                     msg_enable;
 
        struct napi_struct      napi;
        u32                     msg_enable;
 
+       /*
+        * From this point onwards we're not looking at per-packet fields.
+        */
+       void __iomem            *mac_base;
+
        struct ag71xx_desc      *stop_desc;
        dma_addr_t              stop_desc_dma;
 
        struct ag71xx_desc      *stop_desc;
        dma_addr_t              stop_desc_dma;
 
-       struct ag71xx_ring      rx_ring;
-       struct ag71xx_ring      tx_ring;
-
        struct mii_bus          *mii_bus;
        struct phy_device       *phy_dev;
        void                    *phy_priv;
        struct mii_bus          *mii_bus;
        struct phy_device       *phy_dev;
        void                    *phy_priv;
@@ -175,10 +186,6 @@ struct ag71xx {
        unsigned int            speed;
        int                     duplex;
 
        unsigned int            speed;
        int                     duplex;
 
-       unsigned int            max_frame_len;
-       unsigned int            desc_pktlen_mask;
-       unsigned int            rx_buf_size;
-
        struct delayed_work     restart_work;
        struct delayed_work     link_work;
        struct timer_list       oom_timer;
        struct delayed_work     restart_work;
        struct delayed_work     link_work;
        struct timer_list       oom_timer;