ag71xx: Reorder ag71xx struct members for better cache performance
[openwrt/openwrt.git] / target / linux / ar71xx / files / drivers / net / ethernet / atheros / ag71xx / ag71xx.h
index 606c6f1c057e8e3f3b2f3e3c0f503c4471edf66b..5ead6b316a3cba80ab9e6e238f535ff90db63b50 100644 (file)
 #define AG71XX_DRV_NAME                "ag71xx"
 #define AG71XX_DRV_VERSION     "0.5.35"
 
-#define AG71XX_NAPI_WEIGHT     64
+/*
+ * For our NAPI weight bigger does *NOT* mean better - it means more
+ * D-cache misses and lots more wasted cycles than we'll ever
+ * possibly gain from saving instructions.
+ */
+#define AG71XX_NAPI_WEIGHT     32
 #define AG71XX_OOM_REFILL      (1 + HZ/10)
 
 #define AG71XX_INT_ERR (AG71XX_INT_RX_BE | AG71XX_INT_TX_BE)
 #define AG71XX_INT_INIT        (AG71XX_INT_ERR | AG71XX_INT_POLL)
 
 #define AG71XX_TX_MTU_LEN      1540
-#define AG71XX_RX_PKT_SIZE     \
-       (ETH_FRAME_LEN + ETH_FCS_LEN + VLAN_HLEN)
-#define AG71XX_RX_BUF_SIZE (AG71XX_RX_PKT_SIZE + NET_SKB_PAD + NET_IP_ALIGN)
 
-#define AG71XX_TX_RING_SIZE_DEFAULT    32
-#define AG71XX_RX_RING_SIZE_DEFAULT    128
+#define AG71XX_TX_RING_SPLIT           512
+#define AG71XX_TX_RING_DS_PER_PKT      DIV_ROUND_UP(AG71XX_TX_MTU_LEN, \
+                                                    AG71XX_TX_RING_SPLIT)
+#define AG71XX_TX_RING_SIZE_DEFAULT    128
+#define AG71XX_RX_RING_SIZE_DEFAULT    256
 
-#define AG71XX_TX_RING_SIZE_MAX                32
-#define AG71XX_RX_RING_SIZE_MAX                128
+#define AG71XX_TX_RING_SIZE_MAX                128
+#define AG71XX_RX_RING_SIZE_MAX                256
 
 #ifdef CONFIG_AG71XX_DEBUG
 #define DBG(fmt, args...)      pr_debug(fmt, ## args)
@@ -85,12 +90,14 @@ struct ag71xx_desc {
        u32     pad;
 } __attribute__((aligned(4)));
 
+#define AG71XX_DESC_SIZE       roundup(sizeof(struct ag71xx_desc), \
+                                       L1_CACHE_BYTES)
+
 struct ag71xx_buf {
        union {
                struct sk_buff  *skb;
                void            *rx_buf;
        };
-       struct ag71xx_desc      *desc;
        union {
                dma_addr_t      dma_addr;
                unsigned long   timestamp;
@@ -102,10 +109,10 @@ struct ag71xx_ring {
        struct ag71xx_buf       *buf;
        u8                      *descs_cpu;
        dma_addr_t              descs_dma;
-       unsigned int            desc_size;
+       u16                     desc_split;
+       u16                     order;
        unsigned int            curr;
        unsigned int            dirty;
-       unsigned int            size;
 };
 
 struct ag71xx_mdio {
@@ -146,20 +153,31 @@ struct ag71xx_debug {
 };
 
 struct ag71xx {
-       void __iomem            *mac_base;
+       /*
+        * Critical data related to the per-packet data path are clustered
+        * early in this structure to help improve the D-cache footprint.
+        */
+       struct ag71xx_ring      rx_ring ____cacheline_aligned;
+       struct ag71xx_ring      tx_ring ____cacheline_aligned;
+
+       unsigned int            max_frame_len;
+       unsigned int            desc_pktlen_mask;
+       unsigned int            rx_buf_size;
 
-       spinlock_t              lock;
-       struct platform_device  *pdev;
        struct net_device       *dev;
+       struct platform_device  *pdev;
+       spinlock_t              lock;
        struct napi_struct      napi;
        u32                     msg_enable;
 
+       /*
+        * From this point onwards we're not looking at per-packet fields.
+        */
+       void __iomem            *mac_base;
+
        struct ag71xx_desc      *stop_desc;
        dma_addr_t              stop_desc_dma;
 
-       struct ag71xx_ring      rx_ring;
-       struct ag71xx_ring      tx_ring;
-
        struct mii_bus          *mii_bus;
        struct phy_device       *phy_dev;
        void                    *phy_priv;
@@ -168,7 +186,7 @@ struct ag71xx {
        unsigned int            speed;
        int                     duplex;
 
-       struct work_struct      restart_work;
+       struct delayed_work     restart_work;
        struct delayed_work     link_work;
        struct timer_list       oom_timer;
 
@@ -198,9 +216,16 @@ static inline int ag71xx_desc_empty(struct ag71xx_desc *desc)
        return (desc->ctrl & DESC_EMPTY) != 0;
 }
 
-static inline int ag71xx_desc_pktlen(struct ag71xx_desc *desc)
+static inline struct ag71xx_desc *
+ag71xx_ring_desc(struct ag71xx_ring *ring, int idx)
+{
+       return (struct ag71xx_desc *) &ring->descs_cpu[idx * AG71XX_DESC_SIZE];
+}
+
+static inline int
+ag71xx_ring_size_order(int size)
 {
-       return desc->ctrl & DESC_PKTLEN_M;
+       return fls(size - 1);
 }
 
 /* Register offsets */