Upgrade b43 and mac80211.
[openwrt/staging/lynxis/omap.git] / package / b43 / src / dma.c
index 5e8f8ac0f1dda1f2b38f26f13145fef370a95b00..3dfb28a34be9f8259c75bf5490a703cd88303a02 100644 (file)
@@ -37,6 +37,8 @@
 #include <linux/pci.h>
 #include <linux/delay.h>
 #include <linux/skbuff.h>
+#include <linux/etherdevice.h>
+
 
 /* 32bit DMA ops. */
 static
@@ -165,7 +167,7 @@ static void op64_fill_descriptor(struct b43_dmaring *ring,
        addrhi = (((u64) dmaaddr >> 32) & ~SSB_DMA_TRANSLATION_MASK);
        addrext = (((u64) dmaaddr >> 32) & SSB_DMA_TRANSLATION_MASK)
            >> SSB_DMA_TRANSLATION_SHIFT;
-       addrhi |= ssb_dma_translation(ring->dev->dev);
+       addrhi |= (ssb_dma_translation(ring->dev->dev) << 1);
        if (slot == ring->nr_slots - 1)
                ctl0 |= B43_DMA64_DCTL0_DTABLEEND;
        if (start)
@@ -315,29 +317,27 @@ static struct b43_dmaring *priority_to_txring(struct b43_wldev *dev,
        case 3:
                ring = dev->dma.tx_ring0;
                break;
-       case 4:
-               ring = dev->dma.tx_ring4;
-               break;
-       case 5:
-               ring = dev->dma.tx_ring5;
-               break;
        }
 
        return ring;
 }
 
-/* Bcm43xx-ring to mac80211-queue mapping */
+/* b43-ring to mac80211-queue mapping */
 static inline int txring_to_priority(struct b43_dmaring *ring)
 {
-       static const u8 idx_to_prio[] = { 3, 2, 1, 0, 4, 5, };
+       static const u8 idx_to_prio[] = { 3, 2, 1, 0, };
+       unsigned int index;
 
 /*FIXME: have only one queue, for now */
        return 0;
 
-       return idx_to_prio[ring->index];
+       index = ring->index;
+       if (B43_WARN_ON(index >= ARRAY_SIZE(idx_to_prio)))
+               index = 0;
+       return idx_to_prio[index];
 }
 
-u16 b43_dmacontroller_base(int dma64bit, int controller_idx)
+static u16 b43_dmacontroller_base(enum b43_dmatype type, int controller_idx)
 {
        static const u16 map64[] = {
                B43_MMIO_DMA64_BASE0,
@@ -356,7 +356,7 @@ u16 b43_dmacontroller_base(int dma64bit, int controller_idx)
                B43_MMIO_DMA32_BASE5,
        };
 
-       if (dma64bit) {
+       if (type == B43_DMA_64BIT) {
                B43_WARN_ON(!(controller_idx >= 0 &&
                              controller_idx < ARRAY_SIZE(map64)));
                return map64[controller_idx];
@@ -426,9 +426,21 @@ static inline
 static int alloc_ringmemory(struct b43_dmaring *ring)
 {
        struct device *dev = ring->dev->dev->dev;
-
+       gfp_t flags = GFP_KERNEL;
+
+       /* The specs call for 4K buffers for 30- and 32-bit DMA with 4K
+        * alignment and 8K buffers for 64-bit DMA with 8K alignment. Testing
+        * has shown that 4K is sufficient for the latter as long as the buffer
+        * does not cross an 8K boundary.
+        *
+        * For unknown reasons - possibly a hardware error - the BCM4311 rev
+        * 02, which uses 64-bit DMA, needs the ring buffer in very low memory,
+        * which accounts for the GFP_DMA flag below.
+        */
+       if (ring->type == B43_DMA_64BIT)
+               flags |= GFP_DMA;
        ring->descbase = dma_alloc_coherent(dev, B43_DMA_RINGMEMSIZE,
-                                           &(ring->dmabase), GFP_KERNEL);
+                                           &(ring->dmabase), flags);
        if (!ring->descbase) {
                b43err(ring->dev->wl, "DMA ringmemory allocation failed\n");
                return -ENOMEM;
@@ -447,7 +459,8 @@ static void free_ringmemory(struct b43_dmaring *ring)
 }
 
 /* Reset the RX DMA channel */
-int b43_dmacontroller_rx_reset(struct b43_wldev *dev, u16 mmio_base, int dma64)
+static int b43_dmacontroller_rx_reset(struct b43_wldev *dev, u16 mmio_base,
+                                     enum b43_dmatype type)
 {
        int i;
        u32 value;
@@ -455,12 +468,13 @@ int b43_dmacontroller_rx_reset(struct b43_wldev *dev, u16 mmio_base, int dma64)
 
        might_sleep();
 
-       offset = dma64 ? B43_DMA64_RXCTL : B43_DMA32_RXCTL;
+       offset = (type == B43_DMA_64BIT) ? B43_DMA64_RXCTL : B43_DMA32_RXCTL;
        b43_write32(dev, mmio_base + offset, 0);
        for (i = 0; i < 10; i++) {
-               offset = dma64 ? B43_DMA64_RXSTATUS : B43_DMA32_RXSTATUS;
+               offset = (type == B43_DMA_64BIT) ? B43_DMA64_RXSTATUS :
+                                                  B43_DMA32_RXSTATUS;
                value = b43_read32(dev, mmio_base + offset);
-               if (dma64) {
+               if (type == B43_DMA_64BIT) {
                        value &= B43_DMA64_RXSTAT;
                        if (value == B43_DMA64_RXSTAT_DISABLED) {
                                i = -1;
@@ -483,8 +497,9 @@ int b43_dmacontroller_rx_reset(struct b43_wldev *dev, u16 mmio_base, int dma64)
        return 0;
 }
 
-/* Reset the RX DMA channel */
-int b43_dmacontroller_tx_reset(struct b43_wldev *dev, u16 mmio_base, int dma64)
+/* Reset the TX DMA channel */
+static int b43_dmacontroller_tx_reset(struct b43_wldev *dev, u16 mmio_base,
+                                     enum b43_dmatype type)
 {
        int i;
        u32 value;
@@ -493,9 +508,10 @@ int b43_dmacontroller_tx_reset(struct b43_wldev *dev, u16 mmio_base, int dma64)
        might_sleep();
 
        for (i = 0; i < 10; i++) {
-               offset = dma64 ? B43_DMA64_TXSTATUS : B43_DMA32_TXSTATUS;
+               offset = (type == B43_DMA_64BIT) ? B43_DMA64_TXSTATUS :
+                                                  B43_DMA32_TXSTATUS;
                value = b43_read32(dev, mmio_base + offset);
-               if (dma64) {
+               if (type == B43_DMA_64BIT) {
                        value &= B43_DMA64_TXSTAT;
                        if (value == B43_DMA64_TXSTAT_DISABLED ||
                            value == B43_DMA64_TXSTAT_IDLEWAIT ||
@@ -510,12 +526,13 @@ int b43_dmacontroller_tx_reset(struct b43_wldev *dev, u16 mmio_base, int dma64)
                }
                msleep(1);
        }
-       offset = dma64 ? B43_DMA64_TXCTL : B43_DMA32_TXCTL;
+       offset = (type == B43_DMA_64BIT) ? B43_DMA64_TXCTL : B43_DMA32_TXCTL;
        b43_write32(dev, mmio_base + offset, 0);
        for (i = 0; i < 10; i++) {
-               offset = dma64 ? B43_DMA64_TXSTATUS : B43_DMA32_TXSTATUS;
+               offset = (type == B43_DMA_64BIT) ? B43_DMA64_TXSTATUS :
+                                                  B43_DMA32_TXSTATUS;
                value = b43_read32(dev, mmio_base + offset);
-               if (dma64) {
+               if (type == B43_DMA_64BIT) {
                        value &= B43_DMA64_TXSTAT;
                        if (value == B43_DMA64_TXSTAT_DISABLED) {
                                i = -1;
@@ -540,6 +557,33 @@ int b43_dmacontroller_tx_reset(struct b43_wldev *dev, u16 mmio_base, int dma64)
        return 0;
 }
 
+/* Check if a DMA mapping address is invalid. */
+static bool b43_dma_mapping_error(struct b43_dmaring *ring,
+                                 dma_addr_t addr,
+                                 size_t buffersize)
+{
+       if (unlikely(dma_mapping_error(addr)))
+               return 1;
+
+       switch (ring->type) {
+       case B43_DMA_30BIT:
+               if ((u64)addr + buffersize > (1ULL << 30))
+                       return 1;
+               break;
+       case B43_DMA_32BIT:
+               if ((u64)addr + buffersize > (1ULL << 32))
+                       return 1;
+               break;
+       case B43_DMA_64BIT:
+               /* Currently we can't have addresses beyond
+                * 64bit in the kernel. */
+               break;
+       }
+
+       /* The address is OK. */
+       return 0;
+}
+
 static int setup_rx_descbuffer(struct b43_dmaring *ring,
                               struct b43_dmadesc_generic *desc,
                               struct b43_dmadesc_meta *meta, gfp_t gfp_flags)
@@ -555,7 +599,7 @@ static int setup_rx_descbuffer(struct b43_dmaring *ring,
        if (unlikely(!skb))
                return -ENOMEM;
        dmaaddr = map_descbuffer(ring, skb->data, ring->rx_buffersize, 0);
-       if (dma_mapping_error(dmaaddr)) {
+       if (b43_dma_mapping_error(ring, dmaaddr, ring->rx_buffersize)) {
                /* ugh. try to realloc in zone_dma */
                gfp_flags |= GFP_DMA;
 
@@ -568,7 +612,7 @@ static int setup_rx_descbuffer(struct b43_dmaring *ring,
                                         ring->rx_buffersize, 0);
        }
 
-       if (dma_mapping_error(dmaaddr)) {
+       if (b43_dma_mapping_error(ring, dmaaddr, ring->rx_buffersize)) {
                dev_kfree_skb_any(skb);
                return -EIO;
        }
@@ -633,7 +677,7 @@ static int dmacontroller_setup(struct b43_dmaring *ring)
        u32 trans = ssb_dma_translation(ring->dev->dev);
 
        if (ring->tx) {
-               if (ring->dma64) {
+               if (ring->type == B43_DMA_64BIT) {
                        u64 ringbase = (u64) (ring->dmabase);
 
                        addrext = ((ringbase >> 32) & SSB_DMA_TRANSLATION_MASK)
@@ -647,7 +691,7 @@ static int dmacontroller_setup(struct b43_dmaring *ring)
                        b43_dma_write(ring, B43_DMA64_TXRINGHI,
                                      ((ringbase >> 32) &
                                       ~SSB_DMA_TRANSLATION_MASK)
-                                     | trans);
+                                     | (trans << 1));
                } else {
                        u32 ringbase = (u32) (ring->dmabase);
 
@@ -665,7 +709,7 @@ static int dmacontroller_setup(struct b43_dmaring *ring)
                err = alloc_initial_descbuffers(ring);
                if (err)
                        goto out;
-               if (ring->dma64) {
+               if (ring->type == B43_DMA_64BIT) {
                        u64 ringbase = (u64) (ring->dmabase);
 
                        addrext = ((ringbase >> 32) & SSB_DMA_TRANSLATION_MASK)
@@ -680,8 +724,9 @@ static int dmacontroller_setup(struct b43_dmaring *ring)
                        b43_dma_write(ring, B43_DMA64_RXRINGHI,
                                      ((ringbase >> 32) &
                                       ~SSB_DMA_TRANSLATION_MASK)
-                                     | trans);
-                       b43_dma_write(ring, B43_DMA64_RXINDEX, 200);
+                                     | (trans << 1));
+                       b43_dma_write(ring, B43_DMA64_RXINDEX, ring->nr_slots *
+                                     sizeof(struct b43_dmadesc64));
                } else {
                        u32 ringbase = (u32) (ring->dmabase);
 
@@ -695,11 +740,12 @@ static int dmacontroller_setup(struct b43_dmaring *ring)
                        b43_dma_write(ring, B43_DMA32_RXRING,
                                      (ringbase & ~SSB_DMA_TRANSLATION_MASK)
                                      | trans);
-                       b43_dma_write(ring, B43_DMA32_RXINDEX, 200);
+                       b43_dma_write(ring, B43_DMA32_RXINDEX, ring->nr_slots *
+                                     sizeof(struct b43_dmadesc32));
                }
        }
 
-      out:
+out:
        return err;
 }
 
@@ -708,16 +754,16 @@ static void dmacontroller_cleanup(struct b43_dmaring *ring)
 {
        if (ring->tx) {
                b43_dmacontroller_tx_reset(ring->dev, ring->mmio_base,
-                                          ring->dma64);
-               if (ring->dma64) {
+                                          ring->type);
+               if (ring->type == B43_DMA_64BIT) {
                        b43_dma_write(ring, B43_DMA64_TXRINGLO, 0);
                        b43_dma_write(ring, B43_DMA64_TXRINGHI, 0);
                } else
                        b43_dma_write(ring, B43_DMA32_TXRING, 0);
        } else {
                b43_dmacontroller_rx_reset(ring->dev, ring->mmio_base,
-                                          ring->dma64);
-               if (ring->dma64) {
+                                          ring->type);
+               if (ring->type == B43_DMA_64BIT) {
                        b43_dma_write(ring, B43_DMA64_RXRINGLO, 0);
                        b43_dma_write(ring, B43_DMA64_RXRINGHI, 0);
                } else
@@ -772,7 +818,8 @@ static u64 supported_dma_mask(struct b43_wldev *dev)
 static
 struct b43_dmaring *b43_setup_dmaring(struct b43_wldev *dev,
                                      int controller_index,
-                                     int for_tx, int dma64)
+                                     int for_tx,
+                                     enum b43_dmatype type)
 {
        struct b43_dmaring *ring;
        int err;
@@ -782,6 +829,7 @@ struct b43_dmaring *b43_setup_dmaring(struct b43_wldev *dev,
        ring = kzalloc(sizeof(*ring), GFP_KERNEL);
        if (!ring)
                goto out;
+       ring->type = type;
 
        nr_slots = B43_RXRING_SLOTS;
        if (for_tx)
@@ -793,7 +841,7 @@ struct b43_dmaring *b43_setup_dmaring(struct b43_wldev *dev,
                goto err_kfree_ring;
        if (for_tx) {
                ring->txhdr_cache = kcalloc(nr_slots,
-                                           sizeof(struct b43_txhdr_fw4),
+                                           b43_txhdr_size(dev),
                                            GFP_KERNEL);
                if (!ring->txhdr_cache)
                        goto err_kfree_meta;
@@ -801,39 +849,38 @@ struct b43_dmaring *b43_setup_dmaring(struct b43_wldev *dev,
                /* test for ability to dma to txhdr_cache */
                dma_test = dma_map_single(dev->dev->dev,
                                          ring->txhdr_cache,
-                                         sizeof(struct b43_txhdr_fw4),
+                                         b43_txhdr_size(dev),
                                          DMA_TO_DEVICE);
 
-               if (dma_mapping_error(dma_test)) {
+               if (b43_dma_mapping_error(ring, dma_test, b43_txhdr_size(dev))) {
                        /* ugh realloc */
                        kfree(ring->txhdr_cache);
                        ring->txhdr_cache = kcalloc(nr_slots,
-                                                   sizeof(struct
-                                                          b43_txhdr_fw4),
+                                                   b43_txhdr_size(dev),
                                                    GFP_KERNEL | GFP_DMA);
                        if (!ring->txhdr_cache)
                                goto err_kfree_meta;
 
                        dma_test = dma_map_single(dev->dev->dev,
                                                  ring->txhdr_cache,
-                                                 sizeof(struct b43_txhdr_fw4),
+                                                 b43_txhdr_size(dev),
                                                  DMA_TO_DEVICE);
 
-                       if (dma_mapping_error(dma_test))
+                       if (b43_dma_mapping_error(ring, dma_test,
+                                                 b43_txhdr_size(dev)))
                                goto err_kfree_txhdr_cache;
                }
 
                dma_unmap_single(dev->dev->dev,
-                                dma_test, sizeof(struct b43_txhdr_fw4),
+                                dma_test, b43_txhdr_size(dev),
                                 DMA_TO_DEVICE);
        }
 
        ring->dev = dev;
        ring->nr_slots = nr_slots;
-       ring->mmio_base = b43_dmacontroller_base(dma64, controller_index);
+       ring->mmio_base = b43_dmacontroller_base(type, controller_index);
        ring->index = controller_index;
-       ring->dma64 = !!dma64;
-       if (dma64)
+       if (type == B43_DMA_64BIT)
                ring->ops = &dma64_ops;
        else
                ring->ops = &dma32_ops;
@@ -883,8 +930,8 @@ static void b43_destroy_dmaring(struct b43_dmaring *ring)
        if (!ring)
                return;
 
-       b43dbg(ring->dev->wl, "DMA-%s 0x%04X (%s) max used slots: %d/%d\n",
-              (ring->dma64) ? "64" : "32",
+       b43dbg(ring->dev->wl, "DMA-%u 0x%04X (%s) max used slots: %d/%d\n",
+              (unsigned int)(ring->type),
               ring->mmio_base,
               (ring->tx) ? "TX" : "RX", ring->max_used_slots, ring->nr_slots);
        /* Device IRQs are disabled prior entering this function,
@@ -901,11 +948,7 @@ static void b43_destroy_dmaring(struct b43_dmaring *ring)
 
 void b43_dma_free(struct b43_wldev *dev)
 {
-       struct b43_dma *dma;
-
-       if (b43_using_pio(dev))
-               return;
-       dma = &dev->dma;
+       struct b43_dma *dma = &dev->dma;
 
        b43_destroy_dmaring(dma->rx_ring3);
        dma->rx_ring3 = NULL;
@@ -932,74 +975,78 @@ int b43_dma_init(struct b43_wldev *dev)
        struct b43_dmaring *ring;
        int err;
        u64 dmamask;
-       int dma64 = 0;
+       enum b43_dmatype type;
 
        dmamask = supported_dma_mask(dev);
-       if (dmamask == DMA_64BIT_MASK)
-               dma64 = 1;
-
+       switch (dmamask) {
+       default:
+               B43_WARN_ON(1);
+       case DMA_30BIT_MASK:
+               type = B43_DMA_30BIT;
+               break;
+       case DMA_32BIT_MASK:
+               type = B43_DMA_32BIT;
+               break;
+       case DMA_64BIT_MASK:
+               type = B43_DMA_64BIT;
+               break;
+       }
        err = ssb_dma_set_mask(dev->dev, dmamask);
        if (err) {
-#ifdef B43_PIO
-               b43warn(dev->wl, "DMA for this device not supported. "
-                       "Falling back to PIO\n");
-               dev->__using_pio = 1;
-               return -EAGAIN;
-#else
-               b43err(dev->wl, "DMA for this device not supported and "
-                      "no PIO support compiled in\n");
+               b43err(dev->wl, "The machine/kernel does not support "
+                      "the required DMA mask (0x%08X%08X)\n",
+                      (unsigned int)((dmamask & 0xFFFFFFFF00000000ULL) >> 32),
+                      (unsigned int)(dmamask & 0x00000000FFFFFFFFULL));
                return -EOPNOTSUPP;
-#endif
        }
 
        err = -ENOMEM;
        /* setup TX DMA channels. */
-       ring = b43_setup_dmaring(dev, 0, 1, dma64);
+       ring = b43_setup_dmaring(dev, 0, 1, type);
        if (!ring)
                goto out;
        dma->tx_ring0 = ring;
 
-       ring = b43_setup_dmaring(dev, 1, 1, dma64);
+       ring = b43_setup_dmaring(dev, 1, 1, type);
        if (!ring)
                goto err_destroy_tx0;
        dma->tx_ring1 = ring;
 
-       ring = b43_setup_dmaring(dev, 2, 1, dma64);
+       ring = b43_setup_dmaring(dev, 2, 1, type);
        if (!ring)
                goto err_destroy_tx1;
        dma->tx_ring2 = ring;
 
-       ring = b43_setup_dmaring(dev, 3, 1, dma64);
+       ring = b43_setup_dmaring(dev, 3, 1, type);
        if (!ring)
                goto err_destroy_tx2;
        dma->tx_ring3 = ring;
 
-       ring = b43_setup_dmaring(dev, 4, 1, dma64);
+       ring = b43_setup_dmaring(dev, 4, 1, type);
        if (!ring)
                goto err_destroy_tx3;
        dma->tx_ring4 = ring;
 
-       ring = b43_setup_dmaring(dev, 5, 1, dma64);
+       ring = b43_setup_dmaring(dev, 5, 1, type);
        if (!ring)
                goto err_destroy_tx4;
        dma->tx_ring5 = ring;
 
        /* setup RX DMA channels. */
-       ring = b43_setup_dmaring(dev, 0, 0, dma64);
+       ring = b43_setup_dmaring(dev, 0, 0, type);
        if (!ring)
                goto err_destroy_tx5;
        dma->rx_ring0 = ring;
 
        if (dev->dev->id.revision < 5) {
-               ring = b43_setup_dmaring(dev, 3, 0, dma64);
+               ring = b43_setup_dmaring(dev, 3, 0, type);
                if (!ring)
                        goto err_destroy_rx0;
                dma->rx_ring3 = ring;
        }
 
-       b43dbg(dev->wl, "%d-bit DMA initialized\n",
-              (dmamask == DMA_64BIT_MASK) ? 64 :
-              (dmamask == DMA_32BIT_MASK) ? 32 : 30);
+       b43dbg(dev->wl, "%u-bit DMA initialized\n",
+              (unsigned int)type);
        err = 0;
       out:
        return err;
@@ -1038,26 +1085,30 @@ static u16 generate_cookie(struct b43_dmaring *ring, int slot)
         * in the lower 12 bits.
         * Note that the cookie must never be 0, as this
         * is a special value used in RX path.
+        * It can also not be 0xFFFF because that is special
+        * for multicast frames.
         */
        switch (ring->index) {
        case 0:
-               cookie = 0xA000;
+               cookie = 0x1000;
                break;
        case 1:
-               cookie = 0xB000;
+               cookie = 0x2000;
                break;
        case 2:
-               cookie = 0xC000;
+               cookie = 0x3000;
                break;
        case 3:
-               cookie = 0xD000;
+               cookie = 0x4000;
                break;
        case 4:
-               cookie = 0xE000;
+               cookie = 0x5000;
                break;
        case 5:
-               cookie = 0xF000;
+               cookie = 0x6000;
                break;
+       default:
+               B43_WARN_ON(1);
        }
        B43_WARN_ON(slot & ~0x0FFF);
        cookie |= (u16) slot;
@@ -1073,22 +1124,22 @@ struct b43_dmaring *parse_cookie(struct b43_wldev *dev, u16 cookie, int *slot)
        struct b43_dmaring *ring = NULL;
 
        switch (cookie & 0xF000) {
-       case 0xA000:
+       case 0x1000:
                ring = dma->tx_ring0;
                break;
-       case 0xB000:
+       case 0x2000:
                ring = dma->tx_ring1;
                break;
-       case 0xC000:
+       case 0x3000:
                ring = dma->tx_ring2;
                break;
-       case 0xD000:
+       case 0x4000:
                ring = dma->tx_ring3;
                break;
-       case 0xE000:
+       case 0x5000:
                ring = dma->tx_ring4;
                break;
-       case 0xF000:
+       case 0x6000:
                ring = dma->tx_ring5;
                break;
        default:
@@ -1106,32 +1157,45 @@ static int dma_tx_fragment(struct b43_dmaring *ring,
 {
        const struct b43_dma_ops *ops = ring->ops;
        u8 *header;
-       int slot;
+       int slot, old_top_slot, old_used_slots;
        int err;
        struct b43_dmadesc_generic *desc;
        struct b43_dmadesc_meta *meta;
        struct b43_dmadesc_meta *meta_hdr;
        struct sk_buff *bounce_skb;
+       u16 cookie;
+       size_t hdrsize = b43_txhdr_size(ring->dev);
 
 #define SLOTS_PER_PACKET  2
        B43_WARN_ON(skb_shinfo(skb)->nr_frags);
 
+       old_top_slot = ring->current_slot;
+       old_used_slots = ring->used_slots;
+
        /* Get a slot for the header. */
        slot = request_slot(ring);
        desc = ops->idx2desc(ring, slot, &meta_hdr);
        memset(meta_hdr, 0, sizeof(*meta_hdr));
 
-       header = &(ring->txhdr_cache[slot * sizeof(struct b43_txhdr_fw4)]);
-       b43_generate_txhdr(ring->dev, header,
-                          skb->data, skb->len, ctl,
-                          generate_cookie(ring, slot));
+       header = &(ring->txhdr_cache[slot * hdrsize]);
+       cookie = generate_cookie(ring, slot);
+       err = b43_generate_txhdr(ring->dev, header,
+                                skb->data, skb->len, ctl, cookie);
+       if (unlikely(err)) {
+               ring->current_slot = old_top_slot;
+               ring->used_slots = old_used_slots;
+               return err;
+       }
 
        meta_hdr->dmaaddr = map_descbuffer(ring, (unsigned char *)header,
-                                          sizeof(struct b43_txhdr_fw4), 1);
-       if (dma_mapping_error(meta_hdr->dmaaddr))
+                                          hdrsize, 1);
+       if (b43_dma_mapping_error(ring, meta_hdr->dmaaddr, hdrsize)) {
+               ring->current_slot = old_top_slot;
+               ring->used_slots = old_used_slots;
                return -EIO;
+       }
        ops->fill_descriptor(ring, desc, meta_hdr->dmaaddr,
-                            sizeof(struct b43_txhdr_fw4), 1, 0, 0);
+                            hdrsize, 1, 0, 0);
 
        /* Get a slot for the payload. */
        slot = request_slot(ring);
@@ -1144,9 +1208,11 @@ static int dma_tx_fragment(struct b43_dmaring *ring,
 
        meta->dmaaddr = map_descbuffer(ring, skb->data, skb->len, 1);
        /* create a bounce buffer in zone_dma on mapping failure. */
-       if (dma_mapping_error(meta->dmaaddr)) {
+       if (b43_dma_mapping_error(ring, meta->dmaaddr, skb->len)) {
                bounce_skb = __dev_alloc_skb(skb->len, GFP_ATOMIC | GFP_DMA);
                if (!bounce_skb) {
+                       ring->current_slot = old_top_slot;
+                       ring->used_slots = old_used_slots;
                        err = -ENOMEM;
                        goto out_unmap_hdr;
                }
@@ -1156,7 +1222,9 @@ static int dma_tx_fragment(struct b43_dmaring *ring,
                skb = bounce_skb;
                meta->skb = skb;
                meta->dmaaddr = map_descbuffer(ring, skb->data, skb->len, 1);
-               if (dma_mapping_error(meta->dmaaddr)) {
+               if (b43_dma_mapping_error(ring, meta->dmaaddr, skb->len)) {
+                       ring->current_slot = old_top_slot;
+                       ring->used_slots = old_used_slots;
                        err = -EIO;
                        goto out_free_bounce;
                }
@@ -1164,16 +1232,22 @@ static int dma_tx_fragment(struct b43_dmaring *ring,
 
        ops->fill_descriptor(ring, desc, meta->dmaaddr, skb->len, 0, 1, 1);
 
+       if (ctl->flags & IEEE80211_TXCTL_SEND_AFTER_DTIM) {
+               /* Tell the firmware about the cookie of the last
+                * mcast frame, so it can clear the more-data bit in it. */
+               b43_shm_write16(ring->dev, B43_SHM_SHARED,
+                               B43_SHM_SH_MCASTCOOKIE, cookie);
+       }
        /* Now transfer the whole frame. */
        wmb();
        ops->poke_tx(ring, next_slot(ring, slot));
        return 0;
 
-      out_free_bounce:
+out_free_bounce:
        dev_kfree_skb_any(skb);
-      out_unmap_hdr:
+out_unmap_hdr:
        unmap_descbuffer(ring, meta_hdr->dmaaddr,
-                        sizeof(struct b43_txhdr_fw4), 1);
+                        hdrsize, 1);
        return err;
 }
 
@@ -1202,10 +1276,27 @@ int b43_dma_tx(struct b43_wldev *dev,
               struct sk_buff *skb, struct ieee80211_tx_control *ctl)
 {
        struct b43_dmaring *ring;
+       struct ieee80211_hdr *hdr;
        int err = 0;
        unsigned long flags;
 
-       ring = priority_to_txring(dev, ctl->queue);
+       if (unlikely(skb->len < 2 + 2 + 6)) {
+               /* Too short, this can't be a valid frame. */
+               return -EINVAL;
+       }
+
+       hdr = (struct ieee80211_hdr *)skb->data;
+       if (ctl->flags & IEEE80211_TXCTL_SEND_AFTER_DTIM) {
+               /* The multicast ring will be sent after the DTIM */
+               ring = dev->dma.tx_ring4;
+               /* Set the more-data bit. Ucode will clear it on
+                * the last frame for us. */
+               hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_MOREDATA);
+       } else {
+               /* Decide by priority where to put this frame. */
+               ring = priority_to_txring(dev, ctl->queue);
+       }
+
        spin_lock_irqsave(&ring->lock, flags);
        B43_WARN_ON(!ring->tx);
        if (unlikely(free_slots(ring) < SLOTS_PER_PACKET)) {
@@ -1219,6 +1310,13 @@ int b43_dma_tx(struct b43_wldev *dev,
        B43_WARN_ON(ring->stopped);
 
        err = dma_tx_fragment(ring, skb, ctl);
+       if (unlikely(err == -ENOKEY)) {
+               /* Drop this packet, as we don't have the encryption key
+                * anymore and must not transmit it unencrypted. */
+               dev_kfree_skb_any(skb);
+               err = 0;
+               goto out_unlock;
+       }
        if (unlikely(err)) {
                b43err(dev->wl, "DMA tx mapping failure\n");
                goto out_unlock;
@@ -1233,7 +1331,7 @@ int b43_dma_tx(struct b43_wldev *dev,
                        b43dbg(dev->wl, "Stopped TX ring %d\n", ring->index);
                }
        }
-      out_unlock:
+out_unlock:
        spin_unlock_irqrestore(&ring->lock, flags);
 
        return err;
@@ -1265,7 +1363,7 @@ void b43_dma_handle_txstatus(struct b43_wldev *dev,
                                         1);
                else
                        unmap_descbuffer(ring, meta->dmaaddr,
-                                        sizeof(struct b43_txhdr_fw4), 1);
+                                        b43_txhdr_size(dev), 1);
 
                if (meta->is_last_fragment) {
                        B43_WARN_ON(!meta->skb);