ramips: enhance dma engine support
authorMichael Lee <igvtee@gmail.com>
Wed, 2 Dec 2015 13:35:08 +0000 (21:35 +0800)
committerJohn Crispin <john@phrozen.org>
Mon, 13 Jun 2016 20:51:42 +0000 (22:51 +0200)
* fix compiler error. device_control operation not support now.
* add old chips support 8 channels. new chips support 16 channels.
* add mt7621 hsdma driver. data sheet says it has two channels.
  but after test only one channel support.
* add memory to memory DMA support. i use dmatest kernel module to
  verify this function. on rt305x it will copy more data. on mt7621
  only two channels can works at the same time. these two chips
  maybe have hardware bugs. because on other chips don't have these bugs.
* use tasklet to handle remaining dma requests.

Signed-off-by: Michael Lee <igvtee@gmail.com>
target/linux/ramips/patches-4.4/0047-DMA-ralink-add-rt2880-dma-engine.patch

index 3362d4b5fcbe18a27a31cee9eebc9edcede78835..d100a082e7722bd24441376557c36c571db08f47 100644 (file)
@@ -14,13 +14,19 @@ Signed-off-by: John Crispin <blogic@openwrt.org>
 
 --- a/drivers/dma/Kconfig
 +++ b/drivers/dma/Kconfig
-@@ -40,6 +40,12 @@ config ASYNC_TX_ENABLE_CHANNEL_SWITCH
+@@ -40,6 +40,18 @@ config ASYNC_TX_ENABLE_CHANNEL_SWITCH
  config ARCH_HAS_ASYNC_TX_FIND_CHANNEL
        bool
  
 +config DMA_RALINK
 +      tristate "RALINK DMA support"
-+      depends on RALINK && SOC_MT7620
++      depends on RALINK && !SOC_RT288X
++      select DMA_ENGINE
++      select DMA_VIRTUAL_CHANNELS
++
++config MTK_HSDMA
++      tristate "MTK HSDMA support"
++      depends on RALINK && SOC_MT7621
 +      select DMA_ENGINE
 +      select DMA_VIRTUAL_CHANNELS
 +
@@ -29,16 +35,17 @@ Signed-off-by: John Crispin <blogic@openwrt.org>
  
 --- a/drivers/dma/Makefile
 +++ b/drivers/dma/Makefile
-@@ -65,5 +65,6 @@ obj-$(CONFIG_TI_DMA_CROSSBAR) += ti-dma-
+@@ -65,5 +65,7 @@ obj-$(CONFIG_TI_DMA_CROSSBAR) += ti-dma-
  obj-$(CONFIG_TI_EDMA) += edma.o
  obj-$(CONFIG_XGENE_DMA) += xgene-dma.o
  obj-$(CONFIG_ZX_DMA) += zx296702_dma.o
 +obj-$(CONFIG_DMA_RALINK) += ralink-gdma.o
++obj-$(CONFIG_MTK_HSDMA) += mtk-hsdma.o
  
  obj-y += xilinx/
 --- /dev/null
 +++ b/drivers/dma/ralink-gdma.c
-@@ -0,0 +1,577 @@
+@@ -0,0 +1,928 @@
 +/*
 + *  Copyright (C) 2013, Lars-Peter Clausen <lars@metafoo.de>
 + *  GDMA4740 DMAC support
@@ -48,10 +55,6 @@ Signed-off-by: John Crispin <blogic@openwrt.org>
 + *  Free Software Foundation;  either version 2 of the License, or (at your
 + *  option) any later version.
 + *
-+ *  You should have received a copy of the GNU General Public License along
-+ *  with this program; if not, write to the Free Software Foundation, Inc.,
-+ *  675 Mass Ave, Cambridge, MA 02139, USA.
-+ *
 + */
 +
 +#include <linux/dmaengine.h>
@@ -65,11 +68,11 @@ Signed-off-by: John Crispin <blogic@openwrt.org>
 +#include <linux/spinlock.h>
 +#include <linux/irq.h>
 +#include <linux/of_dma.h>
++#include <linux/reset.h>
++#include <linux/of_device.h>
 +
 +#include "virt-dma.h"
 +
-+#define GDMA_NR_CHANS                 16
-+
 +#define GDMA_REG_SRC_ADDR(x)          (0x00 + (x) * 0x10)
 +#define GDMA_REG_DST_ADDR(x)          (0x04 + (x) * 0x10)
 +
@@ -84,7 +87,7 @@ Signed-off-by: John Crispin <blogic@openwrt.org>
 +#define GDMA_REG_CTRL0_BURST_SHIFT    3
 +#define       GDMA_REG_CTRL0_DONE_INT         BIT(2)
 +#define       GDMA_REG_CTRL0_ENABLE           BIT(1)
-+#define       GDMA_REG_CTRL0_HW_MODE          0
++#define GDMA_REG_CTRL0_SW_MODE          BIT(0)
 +
 +#define GDMA_REG_CTRL1(x)             (0x0c + (x) * 0x10)
 +#define GDMA_REG_CTRL1_SEG_MASK               0xf
@@ -109,16 +112,39 @@ Signed-off-by: John Crispin <blogic@openwrt.org>
 +#define GDMA_REG_GCT_VER_SHIFT                1
 +#define GDMA_REG_GCT_ARBIT_RR         BIT(0)
 +
++#define GDMA_REG_REQSTS                       0x2a0
++#define GDMA_REG_ACKSTS                       0x2a4
++#define GDMA_REG_FINSTS                       0x2a8
++
++/* for RT305X gdma registers */
++#define GDMA_RT305X_CTRL0_REQ_MASK    0xf
++#define GDMA_RT305X_CTRL0_SRC_REQ_SHIFT       12
++#define GDMA_RT305X_CTRL0_DST_REQ_SHIFT       8
++
++#define GDMA_RT305X_CTRL1_FAIL                BIT(4)
++#define GDMA_RT305X_CTRL1_NEXT_MASK   0x7
++#define GDMA_RT305X_CTRL1_NEXT_SHIFT  1
++
++#define GDMA_RT305X_STATUS_INT                0x80
++#define GDMA_RT305X_STATUS_SIGNAL     0x84
++#define GDMA_RT305X_GCT                       0x88
++
++/* for MT7621 gdma registers */
++#define GDMA_REG_PERF_START(x)                (0x230 + (x) * 0x8)
++#define GDMA_REG_PERF_END(x)          (0x234 + (x) * 0x8)
++
 +enum gdma_dma_transfer_size {
 +      GDMA_TRANSFER_SIZE_4BYTE        = 0,
 +      GDMA_TRANSFER_SIZE_8BYTE        = 1,
 +      GDMA_TRANSFER_SIZE_16BYTE       = 2,
 +      GDMA_TRANSFER_SIZE_32BYTE       = 3,
++      GDMA_TRANSFER_SIZE_64BYTE       = 4,
 +};
 +
 +struct gdma_dma_sg {
-+      dma_addr_t addr;
-+      unsigned int len;
++      dma_addr_t src_addr;
++      dma_addr_t dst_addr;
++      u32 len;
 +};
 +
 +struct gdma_dma_desc {
@@ -127,6 +153,7 @@ Signed-off-by: John Crispin <blogic@openwrt.org>
 +      enum dma_transfer_direction direction;
 +      bool cyclic;
 +
++      u32 residue;
 +      unsigned int num_sgs;
 +      struct gdma_dma_sg sg[];
 +};
@@ -134,9 +161,10 @@ Signed-off-by: John Crispin <blogic@openwrt.org>
 +struct gdma_dmaengine_chan {
 +      struct virt_dma_chan vchan;
 +      unsigned int id;
++      unsigned int slave_id;
 +
 +      dma_addr_t fifo_addr;
-+      unsigned int transfer_shift;
++      enum gdma_dma_transfer_size burst_size;
 +
 +      struct gdma_dma_desc *desc;
 +      unsigned int next_sg;
@@ -144,10 +172,22 @@ Signed-off-by: John Crispin <blogic@openwrt.org>
 +
 +struct gdma_dma_dev {
 +      struct dma_device ddev;
++      struct device_dma_parameters dma_parms;
++      struct gdma_data *data;
 +      void __iomem *base;
-+      struct clk *clk;
++      struct tasklet_struct task;
++      volatile unsigned long chan_issued;
++      atomic_t cnt;
++
++      struct gdma_dmaengine_chan chan[];
++};
 +
-+      struct gdma_dmaengine_chan chan[GDMA_NR_CHANS];
++struct gdma_data
++{
++      int chancnt;
++      u32 done_int_reg;
++      void (*init)(struct gdma_dma_dev *dma_dev);
++      int (*start_transfer)(struct gdma_dmaengine_chan *chan);
 +};
 +
 +static struct gdma_dma_dev *gdma_dma_chan_get_dev(
@@ -176,21 +216,9 @@ Signed-off-by: John Crispin <blogic@openwrt.org>
 +static inline void gdma_dma_write(struct gdma_dma_dev *dma_dev,
 +      unsigned reg, uint32_t val)
 +{
-+      //printk("gdma --> %p = 0x%08X\n", dma_dev->base + reg, val);
 +      writel(val, dma_dev->base + reg);
 +}
 +
-+static inline void gdma_dma_write_mask(struct gdma_dma_dev *dma_dev,
-+      unsigned int reg, uint32_t val, uint32_t mask)
-+{
-+      uint32_t tmp;
-+
-+      tmp = gdma_dma_read(dma_dev, reg);
-+      tmp &= ~mask;
-+      tmp |= val;
-+      gdma_dma_write(dma_dev, reg, tmp);
-+}
-+
 +static struct gdma_dma_desc *gdma_dma_alloc_desc(unsigned int num_sgs)
 +{
 +      return kzalloc(sizeof(struct gdma_dma_desc) +
@@ -199,58 +227,54 @@ Signed-off-by: John Crispin <blogic@openwrt.org>
 +
 +static enum gdma_dma_transfer_size gdma_dma_maxburst(u32 maxburst)
 +{
-+      if (maxburst <= 7)
++      if (maxburst < 2)
 +              return GDMA_TRANSFER_SIZE_4BYTE;
-+      else if (maxburst <= 15)
++      else if (maxburst < 4)
 +              return GDMA_TRANSFER_SIZE_8BYTE;
-+      else if (maxburst <= 31)
++      else if (maxburst < 8)
 +              return GDMA_TRANSFER_SIZE_16BYTE;
-+
-+      return GDMA_TRANSFER_SIZE_32BYTE;
++      else if (maxburst < 16)
++              return GDMA_TRANSFER_SIZE_32BYTE;
++      else
++              return GDMA_TRANSFER_SIZE_64BYTE;
 +}
 +
-+static int gdma_dma_slave_config(struct dma_chan *c,
-+      const struct dma_slave_config *config)
++static int gdma_dma_config(struct dma_chan *c,
++              struct dma_slave_config *config)
 +{
 +      struct gdma_dmaengine_chan *chan = to_gdma_dma_chan(c);
 +      struct gdma_dma_dev *dma_dev = gdma_dma_chan_get_dev(chan);
-+      enum gdma_dma_transfer_size transfer_size;
-+      uint32_t flags;
-+      uint32_t ctrl0, ctrl1;
++
++      if (config->device_fc) {
++              dev_err(dma_dev->ddev.dev, "not support flow controller\n");
++              return -EINVAL;
++      }
 +
 +      switch (config->direction) {
 +      case DMA_MEM_TO_DEV:
-+              ctrl1 = 32 << GDMA_REG_CTRL1_SRC_REQ_SHIFT;
-+              ctrl1 |= config->slave_id << GDMA_REG_CTRL1_DST_REQ_SHIFT;
-+              flags = GDMA_REG_CTRL0_DST_ADDR_FIXED;
-+              transfer_size = gdma_dma_maxburst(config->dst_maxburst);
++              if (config->dst_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES) {
++                      dev_err(dma_dev->ddev.dev, "only support 4 byte buswidth\n");
++                      return -EINVAL;
++              }
++              chan->slave_id = config->slave_id;
 +              chan->fifo_addr = config->dst_addr;
++              chan->burst_size = gdma_dma_maxburst(config->dst_maxburst);
 +              break;
-+
 +      case DMA_DEV_TO_MEM:
-+              ctrl1 = config->slave_id << GDMA_REG_CTRL1_SRC_REQ_SHIFT;
-+              ctrl1 |= 32 << GDMA_REG_CTRL1_DST_REQ_SHIFT;
-+              flags = GDMA_REG_CTRL0_SRC_ADDR_FIXED;
-+              transfer_size = gdma_dma_maxburst(config->src_maxburst);
++              if (config->src_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES) {
++                      dev_err(dma_dev->ddev.dev, "only support 4 byte buswidth\n");
++                      return -EINVAL;
++              }
++              chan->slave_id = config->slave_id;
 +              chan->fifo_addr = config->src_addr;
++              chan->burst_size = gdma_dma_maxburst(config->src_maxburst);
 +              break;
-+
 +      default:
++              dev_err(dma_dev->ddev.dev, "direction type %d error\n",
++                              config->direction);
 +              return -EINVAL;
 +      }
 +
-+      chan->transfer_shift = 1 + transfer_size;
-+
-+      ctrl0 = flags | GDMA_REG_CTRL0_HW_MODE;
-+      ctrl0 |= GDMA_REG_CTRL0_DONE_INT;
-+
-+      ctrl1 &= ~(GDMA_REG_CTRL1_NEXT_MASK << GDMA_REG_CTRL1_NEXT_SHIFT);
-+      ctrl1 |= chan->id << GDMA_REG_CTRL1_NEXT_SHIFT;
-+      ctrl1 |= GDMA_REG_CTRL1_FAIL;
-+      ctrl1 &= ~GDMA_REG_CTRL1_CONTINOUS;
-+      gdma_dma_write(dma_dev, GDMA_REG_CTRL0(chan->id), ctrl0);
-+      gdma_dma_write(dma_dev, GDMA_REG_CTRL1(chan->id), ctrl1);
-+
 +      return 0;
 +}
 +
@@ -258,108 +282,271 @@ Signed-off-by: John Crispin <blogic@openwrt.org>
 +{
 +      struct gdma_dmaengine_chan *chan = to_gdma_dma_chan(c);
 +      struct gdma_dma_dev *dma_dev = gdma_dma_chan_get_dev(chan);
-+      unsigned long flags;
++      unsigned long flags, timeout;
 +      LIST_HEAD(head);
++      int i = 0;
 +
 +      spin_lock_irqsave(&chan->vchan.lock, flags);
-+      gdma_dma_write_mask(dma_dev, GDMA_REG_CTRL0(chan->id), 0,
-+                      GDMA_REG_CTRL0_ENABLE);
 +      chan->desc = NULL;
++      clear_bit(chan->id, &dma_dev->chan_issued);
 +      vchan_get_all_descriptors(&chan->vchan, &head);
 +      spin_unlock_irqrestore(&chan->vchan.lock, flags);
 +
 +      vchan_dma_desc_free_list(&chan->vchan, &head);
 +
++      /* wait dma transfer complete */
++      timeout = jiffies + msecs_to_jiffies(5000);
++      while (gdma_dma_read(dma_dev, GDMA_REG_CTRL0(chan->id)) &
++                      GDMA_REG_CTRL0_ENABLE) {
++              if (time_after_eq(jiffies, timeout)) {
++                      dev_err(dma_dev->ddev.dev, "chan %d wait timeout\n",
++                                      chan->id);
++                      /* restore to init value */
++                      gdma_dma_write(dma_dev, GDMA_REG_CTRL0(chan->id), 0);
++                      break;
++              }
++              cpu_relax();
++              i++;
++      }
++
++      if (i)
++              dev_dbg(dma_dev->ddev.dev, "terminate chan %d loops %d\n",
++                              chan->id, i);
++
 +      return 0;
 +}
 +
-+static int gdma_dma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
-+      unsigned long arg)
++static void rt305x_dump_reg(struct gdma_dma_dev *dma_dev, int id)
 +{
-+      struct dma_slave_config *config = (struct dma_slave_config *)arg;
-+
-+      switch (cmd) {
-+      case DMA_SLAVE_CONFIG:
-+              return gdma_dma_slave_config(chan, config);
-+      case DMA_TERMINATE_ALL:
-+              return gdma_dma_terminate_all(chan);
-+      default:
-+              return -ENOSYS;
-+      }
++      dev_dbg(dma_dev->ddev.dev, "chan %d, src %08x, dst %08x, ctr0 %08x, " \
++                      "ctr1 %08x, intr %08x, signal %08x\n", id,
++                      gdma_dma_read(dma_dev, GDMA_REG_SRC_ADDR(id)),
++                      gdma_dma_read(dma_dev, GDMA_REG_DST_ADDR(id)),
++                      gdma_dma_read(dma_dev, GDMA_REG_CTRL0(id)),
++                      gdma_dma_read(dma_dev, GDMA_REG_CTRL1(id)),
++                      gdma_dma_read(dma_dev, GDMA_RT305X_STATUS_INT),
++                      gdma_dma_read(dma_dev, GDMA_RT305X_STATUS_SIGNAL));
 +}
 +
-+static int gdma_dma_start_transfer(struct gdma_dmaengine_chan *chan)
++static int rt305x_gdma_start_transfer(struct gdma_dmaengine_chan *chan)
 +{
 +      struct gdma_dma_dev *dma_dev = gdma_dma_chan_get_dev(chan);
 +      dma_addr_t src_addr, dst_addr;
-+      struct virt_dma_desc *vdesc;
 +      struct gdma_dma_sg *sg;
++      uint32_t ctrl0, ctrl1;
 +
-+      gdma_dma_write_mask(dma_dev, GDMA_REG_CTRL0(chan->id), 0,
-+                      GDMA_REG_CTRL0_ENABLE);
++      /* verify chan is already stopped */
++      ctrl0 = gdma_dma_read(dma_dev, GDMA_REG_CTRL0(chan->id));
++      if (unlikely(ctrl0 & GDMA_REG_CTRL0_ENABLE)) {
++              dev_err(dma_dev->ddev.dev, "chan %d is start(%08x).\n",
++                              chan->id, ctrl0);
++              rt305x_dump_reg(dma_dev, chan->id);
++              return -EINVAL;
++      }
 +
-+      if (!chan->desc) {
-+              vdesc = vchan_next_desc(&chan->vchan);
-+              if (!vdesc)
-+                      return 0;
-+              chan->desc = to_gdma_dma_desc(vdesc);
-+              chan->next_sg = 0;
++      sg = &chan->desc->sg[chan->next_sg];
++      if (chan->desc->direction == DMA_MEM_TO_DEV) {
++              src_addr = sg->src_addr;
++              dst_addr = chan->fifo_addr;
++              ctrl0 = GDMA_REG_CTRL0_DST_ADDR_FIXED | \
++                      (8 << GDMA_RT305X_CTRL0_SRC_REQ_SHIFT) | \
++                      (chan->slave_id << GDMA_RT305X_CTRL0_DST_REQ_SHIFT);
++      } else if (chan->desc->direction == DMA_DEV_TO_MEM) {
++              src_addr = chan->fifo_addr;
++              dst_addr = sg->dst_addr;
++              ctrl0 = GDMA_REG_CTRL0_SRC_ADDR_FIXED | \
++                      (chan->slave_id << GDMA_RT305X_CTRL0_SRC_REQ_SHIFT) | \
++                      (8 << GDMA_RT305X_CTRL0_DST_REQ_SHIFT);
++      } else if (chan->desc->direction == DMA_MEM_TO_MEM) {
++              /*
++               * TODO: memcpy function have bugs. sometime it will copy
++               * more 8 bytes data when using dmatest verify.
++               */
++              src_addr = sg->src_addr;
++              dst_addr = sg->dst_addr;
++              ctrl0 = GDMA_REG_CTRL0_SW_MODE | \
++                      (8 << GDMA_REG_CTRL1_SRC_REQ_SHIFT) | \
++                      (8 << GDMA_REG_CTRL1_DST_REQ_SHIFT);
++      } else {
++              dev_err(dma_dev->ddev.dev, "direction type %d error\n",
++                              chan->desc->direction);
++              return -EINVAL;
 +      }
 +
-+      if (chan->next_sg == chan->desc->num_sgs)
-+              chan->next_sg = 0;
++      ctrl0 |= (sg->len << GDMA_REG_CTRL0_TX_SHIFT) | \
++               (chan->burst_size << GDMA_REG_CTRL0_BURST_SHIFT) | \
++               GDMA_REG_CTRL0_DONE_INT | GDMA_REG_CTRL0_ENABLE;
++      ctrl1 = chan->id << GDMA_REG_CTRL1_NEXT_SHIFT;
 +
-+      sg = &chan->desc->sg[chan->next_sg];
++      chan->next_sg++;
++      gdma_dma_write(dma_dev, GDMA_REG_SRC_ADDR(chan->id), src_addr);
++      gdma_dma_write(dma_dev, GDMA_REG_DST_ADDR(chan->id), dst_addr);
++      gdma_dma_write(dma_dev, GDMA_REG_CTRL1(chan->id), ctrl1);
++
++      /* make sure next_sg is update */
++      wmb();
++      gdma_dma_write(dma_dev, GDMA_REG_CTRL0(chan->id), ctrl0);
 +
++      return 0;
++}
++
++static void rt3883_dump_reg(struct gdma_dma_dev *dma_dev, int id)
++{
++      dev_dbg(dma_dev->ddev.dev, "chan %d, src %08x, dst %08x, ctr0 %08x, " \
++                      "ctr1 %08x, unmask %08x, done %08x, " \
++                      "req %08x, ack %08x, fin %08x\n", id,
++                      gdma_dma_read(dma_dev, GDMA_REG_SRC_ADDR(id)),
++                      gdma_dma_read(dma_dev, GDMA_REG_DST_ADDR(id)),
++                      gdma_dma_read(dma_dev, GDMA_REG_CTRL0(id)),
++                      gdma_dma_read(dma_dev, GDMA_REG_CTRL1(id)),
++                      gdma_dma_read(dma_dev, GDMA_REG_UNMASK_INT),
++                      gdma_dma_read(dma_dev, GDMA_REG_DONE_INT),
++                      gdma_dma_read(dma_dev, GDMA_REG_REQSTS),
++                      gdma_dma_read(dma_dev, GDMA_REG_ACKSTS),
++                      gdma_dma_read(dma_dev, GDMA_REG_FINSTS));
++}
++
++static int rt3883_gdma_start_transfer(struct gdma_dmaengine_chan *chan)
++{
++      struct gdma_dma_dev *dma_dev = gdma_dma_chan_get_dev(chan);
++      dma_addr_t src_addr, dst_addr;
++      struct gdma_dma_sg *sg;
++      uint32_t ctrl0, ctrl1;
++
++      /* verify chan is already stopped */
++      ctrl0 = gdma_dma_read(dma_dev, GDMA_REG_CTRL0(chan->id));
++      if (unlikely(ctrl0 & GDMA_REG_CTRL0_ENABLE)) {
++              dev_err(dma_dev->ddev.dev, "chan %d is start(%08x).\n",
++                              chan->id, ctrl0);
++              rt3883_dump_reg(dma_dev, chan->id);
++              return -EINVAL;
++      }
++
++      sg = &chan->desc->sg[chan->next_sg];
 +      if (chan->desc->direction == DMA_MEM_TO_DEV) {
-+              src_addr = sg->addr;
++              src_addr = sg->src_addr;
 +              dst_addr = chan->fifo_addr;
-+      } else {
++              ctrl0 = GDMA_REG_CTRL0_DST_ADDR_FIXED;
++              ctrl1 = (32 << GDMA_REG_CTRL1_SRC_REQ_SHIFT) | \
++                      (chan->slave_id << GDMA_REG_CTRL1_DST_REQ_SHIFT);
++      } else if (chan->desc->direction == DMA_DEV_TO_MEM) {
 +              src_addr = chan->fifo_addr;
-+              dst_addr = sg->addr;
++              dst_addr = sg->dst_addr;
++              ctrl0 = GDMA_REG_CTRL0_SRC_ADDR_FIXED;
++              ctrl1 = (chan->slave_id << GDMA_REG_CTRL1_SRC_REQ_SHIFT) | \
++                      (32 << GDMA_REG_CTRL1_DST_REQ_SHIFT) | \
++                      GDMA_REG_CTRL1_COHERENT;
++      } else if (chan->desc->direction == DMA_MEM_TO_MEM) {
++              src_addr = sg->src_addr;
++              dst_addr = sg->dst_addr;
++              ctrl0 = GDMA_REG_CTRL0_SW_MODE;
++              ctrl1 = (32 << GDMA_REG_CTRL1_SRC_REQ_SHIFT) | \
++                      (32 << GDMA_REG_CTRL1_DST_REQ_SHIFT) | \
++                      GDMA_REG_CTRL1_COHERENT;
++      } else {
++              dev_err(dma_dev->ddev.dev, "direction type %d error\n",
++                              chan->desc->direction);
++              return -EINVAL;
 +      }
++
++      ctrl0 |= (sg->len << GDMA_REG_CTRL0_TX_SHIFT) | \
++               (chan->burst_size << GDMA_REG_CTRL0_BURST_SHIFT) | \
++               GDMA_REG_CTRL0_DONE_INT | GDMA_REG_CTRL0_ENABLE;
++      ctrl1 |= chan->id << GDMA_REG_CTRL1_NEXT_SHIFT;
++
++      chan->next_sg++;
 +      gdma_dma_write(dma_dev, GDMA_REG_SRC_ADDR(chan->id), src_addr);
 +      gdma_dma_write(dma_dev, GDMA_REG_DST_ADDR(chan->id), dst_addr);
-+      gdma_dma_write_mask(dma_dev, GDMA_REG_CTRL0(chan->id),
-+                      (sg->len << GDMA_REG_CTRL0_TX_SHIFT) | GDMA_REG_CTRL0_ENABLE,
-+                      GDMA_REG_CTRL0_TX_MASK << GDMA_REG_CTRL0_TX_SHIFT);
-+      chan->next_sg++;
-+      gdma_dma_write_mask(dma_dev, GDMA_REG_CTRL1(chan->id), 0, GDMA_REG_CTRL1_MASK);
++      gdma_dma_write(dma_dev, GDMA_REG_CTRL1(chan->id), ctrl1);
++
++      /* make sure next_sg is update */
++      wmb();
++      gdma_dma_write(dma_dev, GDMA_REG_CTRL0(chan->id), ctrl0);
 +
 +      return 0;
 +}
 +
-+static void gdma_dma_chan_irq(struct gdma_dmaengine_chan *chan)
++static inline int gdma_start_transfer(struct gdma_dma_dev *dma_dev,
++              struct gdma_dmaengine_chan *chan)
++{
++      return dma_dev->data->start_transfer(chan);
++}
++
++static int gdma_next_desc(struct gdma_dmaengine_chan *chan)
++{
++      struct virt_dma_desc *vdesc;
++
++      vdesc = vchan_next_desc(&chan->vchan);
++      if (!vdesc) {
++              chan->desc = NULL;
++              return 0;
++      }
++      chan->desc = to_gdma_dma_desc(vdesc);
++      chan->next_sg = 0;
++
++      return 1;
++}
++
++static void gdma_dma_chan_irq(struct gdma_dma_dev *dma_dev,
++              struct gdma_dmaengine_chan *chan)
 +{
-+      spin_lock(&chan->vchan.lock);
-+      if (chan->desc) {
-+              if (chan->desc && chan->desc->cyclic) {
-+                      vchan_cyclic_callback(&chan->desc->vdesc);
++      struct gdma_dma_desc *desc;
++      unsigned long flags;
++      int chan_issued;
++
++      chan_issued = 0;
++      spin_lock_irqsave(&chan->vchan.lock, flags);
++      desc = chan->desc;
++      if (desc) {
++              if (desc->cyclic) {
++                      vchan_cyclic_callback(&desc->vdesc);
++                      if (chan->next_sg == desc->num_sgs)
++                              chan->next_sg = 0;
++                      chan_issued = 1;
 +              } else {
-+                      if (chan->next_sg == chan->desc->num_sgs) {
-+                              chan->desc = NULL;
-+                              vchan_cookie_complete(&chan->desc->vdesc);
-+                      }
++                      desc->residue -= desc->sg[chan->next_sg - 1].len;
++                      if (chan->next_sg == desc->num_sgs) {
++                              list_del(&desc->vdesc.node);
++                              vchan_cookie_complete(&desc->vdesc);
++                              chan_issued = gdma_next_desc(chan);
++                      } else
++                              chan_issued = 1;
 +              }
-+      }
-+      gdma_dma_start_transfer(chan);
-+      spin_unlock(&chan->vchan.lock);
++      } else
++              dev_dbg(dma_dev->ddev.dev, "chan %d no desc to complete\n",
++                              chan->id);
++      if (chan_issued)
++              set_bit(chan->id, &dma_dev->chan_issued);
++      spin_unlock_irqrestore(&chan->vchan.lock, flags);
 +}
 +
 +static irqreturn_t gdma_dma_irq(int irq, void *devid)
 +{
 +      struct gdma_dma_dev *dma_dev = devid;
-+      uint32_t unmask, done;
++      u32 done, done_reg;
 +      unsigned int i;
 +
-+      unmask = gdma_dma_read(dma_dev, GDMA_REG_UNMASK_INT);
-+      gdma_dma_write(dma_dev, GDMA_REG_UNMASK_INT, unmask);
-+      done = gdma_dma_read(dma_dev, GDMA_REG_DONE_INT);
++      done_reg = dma_dev->data->done_int_reg;
++      done = gdma_dma_read(dma_dev, done_reg);
++      if (unlikely(!done))
++              return IRQ_NONE;
++
++      /* clean done bits */
++      gdma_dma_write(dma_dev, done_reg, done);
 +
-+      for (i = 0; i < GDMA_NR_CHANS; ++i)
-+              if (done & BIT(i))
-+                      gdma_dma_chan_irq(&dma_dev->chan[i]);
-+      gdma_dma_write(dma_dev, GDMA_REG_DONE_INT, done);
++      i = 0;
++      while (done) {
++              if (done & 0x1) {
++                      gdma_dma_chan_irq(dma_dev, &dma_dev->chan[i]);
++                      atomic_dec(&dma_dev->cnt);
++              }
++              done >>= 1;
++              i++;
++      }
++
++      /* start only have work to do */
++      if (dma_dev->chan_issued)
++              tasklet_schedule(&dma_dev->task);
 +
 +      return IRQ_HANDLED;
 +}
@@ -367,18 +554,25 @@ Signed-off-by: John Crispin <blogic@openwrt.org>
 +static void gdma_dma_issue_pending(struct dma_chan *c)
 +{
 +      struct gdma_dmaengine_chan *chan = to_gdma_dma_chan(c);
++      struct gdma_dma_dev *dma_dev = gdma_dma_chan_get_dev(chan);
 +      unsigned long flags;
 +
 +      spin_lock_irqsave(&chan->vchan.lock, flags);
-+      if (vchan_issue_pending(&chan->vchan) && !chan->desc)
-+              gdma_dma_start_transfer(chan);
++      if (vchan_issue_pending(&chan->vchan) && !chan->desc) {
++              if (gdma_next_desc(chan)) {
++                      set_bit(chan->id, &dma_dev->chan_issued);
++                      tasklet_schedule(&dma_dev->task);
++              } else
++                      dev_dbg(dma_dev->ddev.dev, "chan %d no desc to issue\n",
++                                      chan->id);
++      }
 +      spin_unlock_irqrestore(&chan->vchan.lock, flags);
 +}
 +
 +static struct dma_async_tx_descriptor *gdma_dma_prep_slave_sg(
-+      struct dma_chan *c, struct scatterlist *sgl,
-+      unsigned int sg_len, enum dma_transfer_direction direction,
-+      unsigned long flags, void *context)
++              struct dma_chan *c, struct scatterlist *sgl,
++              unsigned int sg_len, enum dma_transfer_direction direction,
++              unsigned long flags, void *context)
 +{
 +      struct gdma_dmaengine_chan *chan = to_gdma_dma_chan(c);
 +      struct gdma_dma_desc *desc;
@@ -386,12 +580,30 @@ Signed-off-by: John Crispin <blogic@openwrt.org>
 +      unsigned int i;
 +
 +      desc = gdma_dma_alloc_desc(sg_len);
-+      if (!desc)
++      if (!desc) {
++              dev_err(c->device->dev, "alloc sg decs error\n");
 +              return NULL;
++      }
++      desc->residue = 0;
 +
 +      for_each_sg(sgl, sg, sg_len, i) {
-+              desc->sg[i].addr = sg_dma_address(sg);
++              if (direction == DMA_MEM_TO_DEV)
++                      desc->sg[i].src_addr = sg_dma_address(sg);
++              else if (direction == DMA_DEV_TO_MEM)
++                      desc->sg[i].dst_addr = sg_dma_address(sg);
++              else {
++                      dev_err(c->device->dev, "direction type %d error\n",
++                                      direction);
++                      goto free_desc;
++              }
++
++              if (unlikely(sg_dma_len(sg) > GDMA_REG_CTRL0_TX_MASK)) {
++                      dev_err(c->device->dev, "sg len too large %d\n",
++                                      sg_dma_len(sg));
++                      goto free_desc;
++              }
 +              desc->sg[i].len = sg_dma_len(sg);
++              desc->residue += sg_dma_len(sg);
 +      }
 +
 +      desc->num_sgs = sg_len;
@@ -399,12 +611,60 @@ Signed-off-by: John Crispin <blogic@openwrt.org>
 +      desc->cyclic = false;
 +
 +      return vchan_tx_prep(&chan->vchan, &desc->vdesc, flags);
++
++free_desc:
++      kfree(desc);
++      return NULL;
++}
++
++static struct dma_async_tx_descriptor * gdma_dma_prep_dma_memcpy(
++              struct dma_chan *c, dma_addr_t dest, dma_addr_t src,
++              size_t len, unsigned long flags)
++{
++      struct gdma_dmaengine_chan *chan = to_gdma_dma_chan(c);
++      struct gdma_dma_desc *desc;
++      unsigned int num_periods, i;
++      size_t xfer_count;
++
++      if (len <= 0)
++              return NULL;
++
++      chan->burst_size = gdma_dma_maxburst(len >> 2);
++
++      xfer_count = GDMA_REG_CTRL0_TX_MASK;
++      num_periods = DIV_ROUND_UP(len, xfer_count);
++
++      desc = gdma_dma_alloc_desc(num_periods);
++      if (!desc) {
++              dev_err(c->device->dev, "alloc memcpy decs error\n");
++              return NULL;
++      }
++      desc->residue = len;
++
++      for (i = 0; i < num_periods; i++) {
++              desc->sg[i].src_addr = src;
++              desc->sg[i].dst_addr = dest;
++              if (len > xfer_count) {
++                      desc->sg[i].len = xfer_count;
++              } else {
++                      desc->sg[i].len = len;
++              }
++              src += desc->sg[i].len;
++              dest += desc->sg[i].len;
++              len -= desc->sg[i].len;
++      }
++
++      desc->num_sgs = num_periods;
++      desc->direction = DMA_MEM_TO_MEM;
++      desc->cyclic = false;
++
++      return vchan_tx_prep(&chan->vchan, &desc->vdesc, flags);
 +}
 +
 +static struct dma_async_tx_descriptor *gdma_dma_prep_dma_cyclic(
 +      struct dma_chan *c, dma_addr_t buf_addr, size_t buf_len,
 +      size_t period_len, enum dma_transfer_direction direction,
-+      unsigned long flags, void *context)
++      unsigned long flags)
 +{
 +      struct gdma_dmaengine_chan *chan = to_gdma_dma_chan(c);
 +      struct gdma_dma_desc *desc;
@@ -413,14 +673,30 @@ Signed-off-by: John Crispin <blogic@openwrt.org>
 +      if (buf_len % period_len)
 +              return NULL;
 +
-+      num_periods = buf_len / period_len;
++      if (period_len > GDMA_REG_CTRL0_TX_MASK) {
++              dev_err(c->device->dev, "cyclic len too large %d\n",
++                              period_len);
++              return NULL;
++      }
 +
++      num_periods = buf_len / period_len;
 +      desc = gdma_dma_alloc_desc(num_periods);
-+      if (!desc)
++      if (!desc) {
++              dev_err(c->device->dev, "alloc cyclic decs error\n");
 +              return NULL;
++      }
++      desc->residue = buf_len;
 +
 +      for (i = 0; i < num_periods; i++) {
-+              desc->sg[i].addr = buf_addr;
++              if (direction == DMA_MEM_TO_DEV)
++                      desc->sg[i].src_addr = buf_addr;
++              else if (direction == DMA_DEV_TO_MEM)
++                      desc->sg[i].dst_addr = buf_addr;
++              else {
++                      dev_err(c->device->dev, "direction type %d error\n",
++                                      direction);
++                      goto free_desc;
++              }
 +              desc->sg[i].len = period_len;
 +              buf_addr += period_len;
 +      }
@@ -430,28 +706,10 @@ Signed-off-by: John Crispin <blogic@openwrt.org>
 +      desc->cyclic = true;
 +
 +      return vchan_tx_prep(&chan->vchan, &desc->vdesc, flags);
-+}
 +
-+static size_t gdma_dma_desc_residue(struct gdma_dmaengine_chan *chan,
-+      struct gdma_dma_desc *desc, unsigned int next_sg)
-+{
-+      struct gdma_dma_dev *dma_dev = gdma_dma_chan_get_dev(chan);
-+      unsigned int residue, count;
-+      unsigned int i;
-+
-+      residue = 0;
-+
-+      for (i = next_sg; i < desc->num_sgs; i++)
-+              residue += desc->sg[i].len;
-+
-+      if (next_sg != 0) {
-+              count = gdma_dma_read(dma_dev, GDMA_REG_CTRL0(chan->id));
-+              count >>= GDMA_REG_CTRL0_CURR_SHIFT;
-+              count &= GDMA_REG_CTRL0_CURR_MASK;
-+              residue += count << chan->transfer_shift;
-+      }
-+
-+      return residue;
++free_desc:
++      kfree(desc);
++      return NULL;
 +}
 +
 +static enum dma_status gdma_dma_tx_status(struct dma_chan *c,
@@ -461,30 +719,32 @@ Signed-off-by: John Crispin <blogic@openwrt.org>
 +      struct virt_dma_desc *vdesc;
 +      enum dma_status status;
 +      unsigned long flags;
++      struct gdma_dma_desc *desc;
 +
 +      status = dma_cookie_status(c, cookie, state);
-+      if (status == DMA_SUCCESS || !state)
++      if (status == DMA_COMPLETE || !state)
 +              return status;
 +
 +      spin_lock_irqsave(&chan->vchan.lock, flags);
-+      vdesc = vchan_find_desc(&chan->vchan, cookie);
-+      if (cookie == chan->desc->vdesc.tx.cookie) {
-+              state->residue = gdma_dma_desc_residue(chan, chan->desc,
-+                              chan->next_sg);
-+      } else if (vdesc) {
-+              state->residue = gdma_dma_desc_residue(chan,
-+                              to_gdma_dma_desc(vdesc), 0);
-+      } else {
-+              state->residue = 0;
-+      }
++      desc = chan->desc;
++      if (desc && (cookie == desc->vdesc.tx.cookie)) {
++              /*
++               * We never update edesc->residue in the cyclic case, so we
++               * can tell the remaining room to the end of the circular
++               * buffer.
++               */
++              if (desc->cyclic)
++                      state->residue = desc->residue -
++                              ((chan->next_sg - 1) * desc->sg[0].len);
++              else
++                      state->residue = desc->residue;
++      } else if ((vdesc = vchan_find_desc(&chan->vchan, cookie)))
++              state->residue = to_gdma_dma_desc(vdesc)->residue;
 +      spin_unlock_irqrestore(&chan->vchan.lock, flags);
 +
-+      return status;
-+}
++      dev_dbg(c->device->dev, "tx residue %d bytes\n", state->residue);
 +
-+static int gdma_dma_alloc_chan_resources(struct dma_chan *c)
-+{
-+      return 0;
++      return status;
 +}
 +
 +static void gdma_dma_free_chan_resources(struct dma_chan *c)
@@ -497,87 +757,192 @@ Signed-off-by: John Crispin <blogic@openwrt.org>
 +      kfree(container_of(vdesc, struct gdma_dma_desc, vdesc));
 +}
 +
-+static struct dma_chan *
-+of_dma_xlate_by_chan_id(struct of_phandle_args *dma_spec,
-+                      struct of_dma *ofdma)
++static void gdma_dma_tasklet(unsigned long arg)
 +{
-+      struct gdma_dma_dev *dma_dev = ofdma->of_dma_data;
-+      unsigned int request = dma_spec->args[0];
++      struct gdma_dma_dev *dma_dev = (struct gdma_dma_dev *)arg;
++      struct gdma_dmaengine_chan *chan;
++      static unsigned int last_chan;
++      unsigned int i, chan_mask;
++
++      /* record last chan to round robin all chans */
++      i = last_chan;
++      chan_mask = dma_dev->data->chancnt - 1;
++      do {
++              /*
++               * on mt7621. when verify with dmatest with all
++               * channel is enable. we need to limit only two
++               * channel is working at the same time. otherwise the
++               * data will have problem.
++               */
++              if (atomic_read(&dma_dev->cnt) >= 2) {
++                      last_chan = i;
++                      break;
++              }
 +
-+      if (request >= GDMA_NR_CHANS)
-+              return NULL;
++              if (test_and_clear_bit(i, &dma_dev->chan_issued)) {
++                      chan = &dma_dev->chan[i];
++                      if (chan->desc) {
++                              atomic_inc(&dma_dev->cnt);
++                              gdma_start_transfer(dma_dev, chan);
++                      } else
++                              dev_dbg(dma_dev->ddev.dev, "chan %d no desc to issue\n", chan->id);
++
++                      if (!dma_dev->chan_issued)
++                              break;
++              }
++
++              i = (i + 1) & chan_mask;
++      } while (i != last_chan);
++}
++
++static void rt305x_gdma_init(struct gdma_dma_dev *dma_dev)
++{
++      uint32_t gct;
++
++      /* all chans round robin */
++      gdma_dma_write(dma_dev, GDMA_RT305X_GCT, GDMA_REG_GCT_ARBIT_RR);
 +
-+      return dma_get_slave_channel(&(dma_dev->chan[request].vchan.chan));
++      gct = gdma_dma_read(dma_dev, GDMA_RT305X_GCT);
++      dev_info(dma_dev->ddev.dev, "revision: %d, channels: %d\n",
++                      (gct >> GDMA_REG_GCT_VER_SHIFT) & GDMA_REG_GCT_VER_MASK,
++                      8 << ((gct >> GDMA_REG_GCT_CHAN_SHIFT) &
++                              GDMA_REG_GCT_CHAN_MASK));
 +}
 +
++static void rt3883_gdma_init(struct gdma_dma_dev *dma_dev)
++{
++      uint32_t gct;
++
++      /* all chans round robin */
++      gdma_dma_write(dma_dev, GDMA_REG_GCT, GDMA_REG_GCT_ARBIT_RR);
++
++      gct = gdma_dma_read(dma_dev, GDMA_REG_GCT);
++      dev_info(dma_dev->ddev.dev, "revision: %d, channels: %d\n",
++                      (gct >> GDMA_REG_GCT_VER_SHIFT) & GDMA_REG_GCT_VER_MASK,
++                      8 << ((gct >> GDMA_REG_GCT_CHAN_SHIFT) &
++                              GDMA_REG_GCT_CHAN_MASK));
++}
++
++static struct gdma_data rt305x_gdma_data = {
++      .chancnt = 8,
++      .done_int_reg = GDMA_RT305X_STATUS_INT,
++      .init = rt305x_gdma_init,
++      .start_transfer = rt305x_gdma_start_transfer,
++};
++
++static struct gdma_data rt3883_gdma_data = {
++      .chancnt = 16,
++      .done_int_reg = GDMA_REG_DONE_INT,
++      .init = rt3883_gdma_init,
++      .start_transfer = rt3883_gdma_start_transfer,
++};
++
++static const struct of_device_id gdma_of_match_table[] = {
++      { .compatible = "ralink,rt305x-gdma", .data = &rt305x_gdma_data },
++      { .compatible = "ralink,rt3883-gdma", .data = &rt3883_gdma_data },
++      { },
++};
++
 +static int gdma_dma_probe(struct platform_device *pdev)
 +{
++      const struct of_device_id *match;
 +      struct gdma_dmaengine_chan *chan;
 +      struct gdma_dma_dev *dma_dev;
 +      struct dma_device *dd;
 +      unsigned int i;
 +      struct resource *res;
-+      uint32_t gct;
 +      int ret;
 +      int irq;
++      void __iomem *base;
++      struct gdma_data *data;
 +
++      ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
++      if (ret)
++              return ret;
 +
-+      dma_dev = devm_kzalloc(&pdev->dev, sizeof(*dma_dev), GFP_KERNEL);
-+      if (!dma_dev)
++      match = of_match_device(gdma_of_match_table, &pdev->dev);
++      if (!match)
 +              return -EINVAL;
++      data = (struct gdma_data *) match->data;
 +
-+      dd = &dma_dev->ddev;
++      dma_dev = devm_kzalloc(&pdev->dev, sizeof(*dma_dev) +
++                      (sizeof(struct gdma_dmaengine_chan) * data->chancnt),
++                      GFP_KERNEL);
++      if (!dma_dev) {
++              dev_err(&pdev->dev, "alloc dma device failed\n");
++              return -EINVAL;
++      }
++      dma_dev->data = data;
 +
 +      res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-+      dma_dev->base = devm_ioremap_resource(&pdev->dev, res);
-+      if (IS_ERR(dma_dev->base))
-+              return PTR_ERR(dma_dev->base);
++      base = devm_ioremap_resource(&pdev->dev, res);
++      if (IS_ERR(base))
++              return PTR_ERR(base);
++      dma_dev->base = base;
++      tasklet_init(&dma_dev->task, gdma_dma_tasklet, (unsigned long)dma_dev);
++
++      irq = platform_get_irq(pdev, 0);
++      if (irq < 0) {
++              dev_err(&pdev->dev, "failed to get irq\n");
++              return -EINVAL;
++      }
++      ret = devm_request_irq(&pdev->dev, irq, gdma_dma_irq,
++                      0, dev_name(&pdev->dev), dma_dev);
++      if (ret) {
++              dev_err(&pdev->dev, "failed to request irq\n");
++              return ret;
++      }
++
++      device_reset(&pdev->dev);
 +
++      dd = &dma_dev->ddev;
++      dma_cap_set(DMA_MEMCPY, dd->cap_mask);
 +      dma_cap_set(DMA_SLAVE, dd->cap_mask);
 +      dma_cap_set(DMA_CYCLIC, dd->cap_mask);
-+      dd->device_alloc_chan_resources = gdma_dma_alloc_chan_resources;
 +      dd->device_free_chan_resources = gdma_dma_free_chan_resources;
-+      dd->device_tx_status = gdma_dma_tx_status;
-+      dd->device_issue_pending = gdma_dma_issue_pending;
++      dd->device_prep_dma_memcpy = gdma_dma_prep_dma_memcpy;
 +      dd->device_prep_slave_sg = gdma_dma_prep_slave_sg;
 +      dd->device_prep_dma_cyclic = gdma_dma_prep_dma_cyclic;
-+      dd->device_control = gdma_dma_control;
++      dd->device_config = gdma_dma_config;
++      dd->device_terminate_all = gdma_dma_terminate_all;
++      dd->device_tx_status = gdma_dma_tx_status;
++      dd->device_issue_pending = gdma_dma_issue_pending;
++
++      dd->src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
++      dd->dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
++      dd->directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
++      dd->residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT;
++
 +      dd->dev = &pdev->dev;
-+      dd->chancnt = GDMA_NR_CHANS;
++      dd->dev->dma_parms = &dma_dev->dma_parms;
++      dma_set_max_seg_size(dd->dev, GDMA_REG_CTRL0_TX_MASK);
 +      INIT_LIST_HEAD(&dd->channels);
 +
-+      for (i = 0; i < dd->chancnt; i++) {
++      for (i = 0; i < data->chancnt; i++) {
 +              chan = &dma_dev->chan[i];
 +              chan->id = i;
 +              chan->vchan.desc_free = gdma_dma_desc_free;
 +              vchan_init(&chan->vchan, dd);
 +      }
 +
++      /* init hardware */
++      data->init(dma_dev);
++
 +      ret = dma_async_device_register(dd);
-+      if (ret)
++      if (ret) {
++              dev_err(&pdev->dev, "failed to register dma device\n");
 +              return ret;
++      }
 +
 +      ret = of_dma_controller_register(pdev->dev.of_node,
 +              of_dma_xlate_by_chan_id, dma_dev);
-+      if (ret)
-+              goto err_unregister;
-+
-+      irq = platform_get_irq(pdev, 0);
-+      ret = request_irq(irq, gdma_dma_irq, 0, dev_name(&pdev->dev), dma_dev);
-+      if (ret)
++      if (ret) {
++              dev_err(&pdev->dev, "failed to register of dma controller\n");
 +              goto err_unregister;
++      }
 +
-+      gdma_dma_write(dma_dev, GDMA_REG_UNMASK_INT, 0);
-+      gdma_dma_write(dma_dev, GDMA_REG_DONE_INT, BIT(dd->chancnt) - 1);
-+
-+      gct = gdma_dma_read(dma_dev, GDMA_REG_GCT);
-+      dev_info(&pdev->dev, "revision: %d, channels: %d\n",
-+              (gct >> GDMA_REG_GCT_VER_SHIFT) & GDMA_REG_GCT_VER_MASK,
-+              8 << ((gct >> GDMA_REG_GCT_CHAN_SHIFT) & GDMA_REG_GCT_CHAN_MASK));
 +      platform_set_drvdata(pdev, dma_dev);
 +
-+      gdma_dma_write(dma_dev, GDMA_REG_GCT, GDMA_REG_GCT_ARBIT_RR);
-+
 +      return 0;
 +
 +err_unregister:
@@ -588,34 +953,27 @@ Signed-off-by: John Crispin <blogic@openwrt.org>
 +static int gdma_dma_remove(struct platform_device *pdev)
 +{
 +      struct gdma_dma_dev *dma_dev = platform_get_drvdata(pdev);
-+      int irq = platform_get_irq(pdev, 0);
 +
-+      free_irq(irq, dma_dev);
++      tasklet_kill(&dma_dev->task);
 +        of_dma_controller_free(pdev->dev.of_node);
 +      dma_async_device_unregister(&dma_dev->ddev);
 +
 +      return 0;
 +}
 +
-+static const struct of_device_id gdma_of_match_table[] = {
-+      { .compatible = "ralink,rt2880-gdma" },
-+      { },
-+};
-+
 +static struct platform_driver gdma_dma_driver = {
 +      .probe = gdma_dma_probe,
 +      .remove = gdma_dma_remove,
 +      .driver = {
 +              .name = "gdma-rt2880",
-+              .owner = THIS_MODULE,
 +              .of_match_table = gdma_of_match_table,
 +      },
 +};
 +module_platform_driver(gdma_dma_driver);
 +
 +MODULE_AUTHOR("Lars-Peter Clausen <lars@metafoo.de>");
-+MODULE_DESCRIPTION("GDMA4740 DMA driver");
-+MODULE_LICENSE("GPLv2");
++MODULE_DESCRIPTION("Ralink/MTK DMA driver");
++MODULE_LICENSE("GPL v2");
 --- a/include/linux/dmaengine.h
 +++ b/include/linux/dmaengine.h
 @@ -496,6 +496,7 @@ static inline void dma_set_unmap(struct
@@ -626,3 +984,773 @@ Signed-off-by: John Crispin <blogic@openwrt.org>
  #else
  static inline void dma_set_unmap(struct dma_async_tx_descriptor *tx,
                                 struct dmaengine_unmap_data *unmap)
+--- /dev/null
++++ b/drivers/dma/mtk-hsdma.c
+@@ -0,0 +1,767 @@
++/*
++ *  Copyright (C) 2015, Michael Lee <igvtee@gmail.com>
++ *  MTK HSDMA support
++ *
++ *  This program is free software; you can redistribute it and/or modify it
++ *  under  the terms of the GNU General        Public License as published by the
++ *  Free Software Foundation;  either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ */
++
++#include <linux/dmaengine.h>
++#include <linux/dma-mapping.h>
++#include <linux/err.h>
++#include <linux/init.h>
++#include <linux/list.h>
++#include <linux/module.h>
++#include <linux/platform_device.h>
++#include <linux/slab.h>
++#include <linux/spinlock.h>
++#include <linux/irq.h>
++#include <linux/of_dma.h>
++#include <linux/reset.h>
++#include <linux/of_device.h>
++
++#include "virt-dma.h"
++
++#define HSDMA_BASE_OFFSET             0x800
++
++#define HSDMA_REG_TX_BASE             0x00
++#define HSDMA_REG_TX_CNT              0x04
++#define HSDMA_REG_TX_CTX              0x08
++#define HSDMA_REG_TX_DTX              0x0c
++#define HSDMA_REG_RX_BASE             0x100
++#define HSDMA_REG_RX_CNT              0x104
++#define HSDMA_REG_RX_CRX              0x108
++#define HSDMA_REG_RX_DRX              0x10c
++#define HSDMA_REG_INFO                        0x200
++#define HSDMA_REG_GLO_CFG             0x204
++#define HSDMA_REG_RST_CFG             0x208
++#define HSDMA_REG_DELAY_INT           0x20c
++#define HSDMA_REG_FREEQ_THRES         0x210
++#define HSDMA_REG_INT_STATUS          0x220
++#define HSDMA_REG_INT_MASK            0x228
++#define HSDMA_REG_SCH_Q01             0x280
++#define HSDMA_REG_SCH_Q23             0x284
++
++#define HSDMA_DESCS_MAX                       0xfff
++#define HSDMA_DESCS_NUM                       8
++#define HSDMA_DESCS_MASK              (HSDMA_DESCS_NUM - 1)
++#define HSDMA_NEXT_DESC(x)            (((x) + 1) & HSDMA_DESCS_MASK)
++
++/* HSDMA_REG_INFO */
++#define HSDMA_INFO_INDEX_MASK         0xf
++#define HSDMA_INFO_INDEX_SHIFT                24
++#define HSDMA_INFO_BASE_MASK          0xff
++#define HSDMA_INFO_BASE_SHIFT         16
++#define HSDMA_INFO_RX_MASK            0xff
++#define HSDMA_INFO_RX_SHIFT           8
++#define HSDMA_INFO_TX_MASK            0xff
++#define HSDMA_INFO_TX_SHIFT           0
++
++/* HSDMA_REG_GLO_CFG */
++#define HSDMA_GLO_TX_2B_OFFSET                BIT(31)
++#define HSDMA_GLO_CLK_GATE            BIT(30)
++#define HSDMA_GLO_BYTE_SWAP           BIT(29)
++#define HSDMA_GLO_MULTI_DMA           BIT(10)
++#define HSDMA_GLO_TWO_BUF             BIT(9)
++#define HSDMA_GLO_32B_DESC            BIT(8)
++#define HSDMA_GLO_BIG_ENDIAN          BIT(7)
++#define HSDMA_GLO_TX_DONE             BIT(6)
++#define HSDMA_GLO_BT_MASK             0x3
++#define HSDMA_GLO_BT_SHIFT            4
++#define HSDMA_GLO_RX_BUSY             BIT(3)
++#define HSDMA_GLO_RX_DMA              BIT(2)
++#define HSDMA_GLO_TX_BUSY             BIT(1)
++#define HSDMA_GLO_TX_DMA              BIT(0)
++
++#define HSDMA_BT_SIZE_16BYTES         (0 << HSDMA_GLO_BT_SHIFT)
++#define HSDMA_BT_SIZE_32BYTES         (1 << HSDMA_GLO_BT_SHIFT)
++#define HSDMA_BT_SIZE_64BYTES         (2 << HSDMA_GLO_BT_SHIFT)
++#define HSDMA_BT_SIZE_128BYTES                (3 << HSDMA_GLO_BT_SHIFT)
++
++#define HSDMA_GLO_DEFAULT             (HSDMA_GLO_MULTI_DMA | \
++              HSDMA_GLO_RX_DMA | HSDMA_GLO_TX_DMA | HSDMA_BT_SIZE_32BYTES)
++
++/* HSDMA_REG_RST_CFG */
++#define HSDMA_RST_RX_SHIFT            16
++#define HSDMA_RST_TX_SHIFT            0
++
++/* HSDMA_REG_DELAY_INT */
++#define HSDMA_DELAY_INT_EN            BIT(15)
++#define HSDMA_DELAY_PEND_OFFSET               8
++#define HSDMA_DELAY_TIME_OFFSET               0
++#define HSDMA_DELAY_TX_OFFSET         16
++#define HSDMA_DELAY_RX_OFFSET         0
++
++#define HSDMA_DELAY_INIT(x)           (HSDMA_DELAY_INT_EN | \
++              ((x) << HSDMA_DELAY_PEND_OFFSET))
++#define HSDMA_DELAY(x)                        ((HSDMA_DELAY_INIT(x) << \
++              HSDMA_DELAY_TX_OFFSET) | HSDMA_DELAY_INIT(x))
++
++/* HSDMA_REG_INT_STATUS */
++#define HSDMA_INT_DELAY_RX_COH                BIT(31)
++#define HSDMA_INT_DELAY_RX_INT                BIT(30)
++#define HSDMA_INT_DELAY_TX_COH                BIT(29)
++#define HSDMA_INT_DELAY_TX_INT                BIT(28)
++#define HSDMA_INT_RX_MASK             0x3
++#define HSDMA_INT_RX_SHIFT            16
++#define HSDMA_INT_RX_Q0                       BIT(16)
++#define HSDMA_INT_TX_MASK             0xf
++#define HSDMA_INT_TX_SHIFT            0
++#define HSDMA_INT_TX_Q0                       BIT(0)
++
++/* tx/rx dma desc flags */
++#define HSDMA_PLEN_MASK                       0x3fff
++#define HSDMA_DESC_DONE                       BIT(31)
++#define HSDMA_DESC_LS0                        BIT(30)
++#define HSDMA_DESC_PLEN0(_x)          (((_x) & HSDMA_PLEN_MASK) << 16)
++#define HSDMA_DESC_TAG                        BIT(15)
++#define HSDMA_DESC_LS1                        BIT(14)
++#define HSDMA_DESC_PLEN1(_x)          ((_x) & HSDMA_PLEN_MASK)
++
++/* align 4 bytes */
++#define HSDMA_ALIGN_SIZE              3
++/* align size 128bytes */
++#define HSDMA_MAX_PLEN                        0x3f80
++
++struct hsdma_desc {
++      u32 addr0;
++      u32 flags;
++      u32 addr1;
++      u32 unused;
++};
++
++struct mtk_hsdma_sg {
++      dma_addr_t src_addr;
++      dma_addr_t dst_addr;
++      u32 len;
++};
++
++struct mtk_hsdma_desc {
++      struct virt_dma_desc vdesc;
++      unsigned int num_sgs;
++      struct mtk_hsdma_sg sg[1];
++};
++
++struct mtk_hsdma_chan {
++      struct virt_dma_chan vchan;
++      unsigned int id;
++      dma_addr_t desc_addr;
++      int tx_idx;
++      int rx_idx;
++      struct hsdma_desc *tx_ring;
++      struct hsdma_desc *rx_ring;
++      struct mtk_hsdma_desc *desc;
++      unsigned int next_sg;
++};
++
++struct mtk_hsdam_engine {
++      struct dma_device ddev;
++      struct device_dma_parameters dma_parms;
++      void __iomem *base;
++      struct tasklet_struct task;
++      volatile unsigned long chan_issued;
++
++      struct mtk_hsdma_chan chan[1];
++};
++
++static inline struct mtk_hsdam_engine *mtk_hsdma_chan_get_dev(
++              struct mtk_hsdma_chan *chan)
++{
++      return container_of(chan->vchan.chan.device, struct mtk_hsdam_engine,
++                      ddev);
++}
++
++static inline struct mtk_hsdma_chan *to_mtk_hsdma_chan(struct dma_chan *c)
++{
++      return container_of(c, struct mtk_hsdma_chan, vchan.chan);
++}
++
++static inline struct mtk_hsdma_desc *to_mtk_hsdma_desc(
++              struct virt_dma_desc *vdesc)
++{
++      return container_of(vdesc, struct mtk_hsdma_desc, vdesc);
++}
++
++static inline u32 mtk_hsdma_read(struct mtk_hsdam_engine *hsdma, u32 reg)
++{
++      return readl(hsdma->base + reg);
++}
++
++static inline void mtk_hsdma_write(struct mtk_hsdam_engine *hsdma,
++              unsigned reg, u32 val)
++{
++      writel(val, hsdma->base + reg);
++}
++
++static void mtk_hsdma_reset_chan(struct mtk_hsdam_engine *hsdma,
++              struct mtk_hsdma_chan *chan)
++{
++      chan->tx_idx = 0;
++      chan->rx_idx = HSDMA_DESCS_NUM - 1;
++
++      mtk_hsdma_write(hsdma, HSDMA_REG_TX_CTX, chan->tx_idx);
++      mtk_hsdma_write(hsdma, HSDMA_REG_RX_CRX, chan->rx_idx);
++
++      mtk_hsdma_write(hsdma, HSDMA_REG_RST_CFG,
++                      0x1 << (chan->id + HSDMA_RST_TX_SHIFT));
++      mtk_hsdma_write(hsdma, HSDMA_REG_RST_CFG,
++                      0x1 << (chan->id + HSDMA_RST_RX_SHIFT));
++}
++
++static void hsdma_dump_reg(struct mtk_hsdam_engine *hsdma)
++{
++      dev_dbg(hsdma->ddev.dev, "tbase %08x, tcnt %08x, " \
++                      "tctx %08x, tdtx: %08x, rbase %08x, " \
++                      "rcnt %08x, rctx %08x, rdtx %08x\n",
++                      mtk_hsdma_read(hsdma, HSDMA_REG_TX_BASE),
++                      mtk_hsdma_read(hsdma, HSDMA_REG_TX_CNT),
++                      mtk_hsdma_read(hsdma, HSDMA_REG_TX_CTX),
++                      mtk_hsdma_read(hsdma, HSDMA_REG_TX_DTX),
++                      mtk_hsdma_read(hsdma, HSDMA_REG_RX_BASE),
++                      mtk_hsdma_read(hsdma, HSDMA_REG_RX_CNT),
++                      mtk_hsdma_read(hsdma, HSDMA_REG_RX_CRX),
++                      mtk_hsdma_read(hsdma, HSDMA_REG_RX_DRX));
++
++      dev_dbg(hsdma->ddev.dev, "info %08x, glo %08x, delay %08x, " \
++                      "intr_stat %08x, intr_mask %08x\n",
++                      mtk_hsdma_read(hsdma, HSDMA_REG_INFO),
++                      mtk_hsdma_read(hsdma, HSDMA_REG_GLO_CFG),
++                      mtk_hsdma_read(hsdma, HSDMA_REG_DELAY_INT),
++                      mtk_hsdma_read(hsdma, HSDMA_REG_INT_STATUS),
++                      mtk_hsdma_read(hsdma, HSDMA_REG_INT_MASK));
++}
++
++static void hsdma_dump_desc(struct mtk_hsdam_engine *hsdma,
++              struct mtk_hsdma_chan *chan)
++{
++      struct hsdma_desc *tx_desc;
++      struct hsdma_desc *rx_desc;
++      int i;
++
++      dev_dbg(hsdma->ddev.dev, "tx idx: %d, rx idx: %d\n",
++                      chan->tx_idx, chan->rx_idx);
++
++      for (i = 0; i < HSDMA_DESCS_NUM; i++) {
++              tx_desc = &chan->tx_ring[i];
++              rx_desc = &chan->rx_ring[i];
++
++              dev_dbg(hsdma->ddev.dev, "%d tx addr0: %08x, flags %08x, " \
++                              "tx addr1: %08x, rx addr0 %08x, flags %08x\n",
++                              i, tx_desc->addr0, tx_desc->flags, \
++                              tx_desc->addr1, rx_desc->addr0, rx_desc->flags);
++      }
++}
++
++static void mtk_hsdma_reset(struct mtk_hsdam_engine *hsdma,
++              struct mtk_hsdma_chan *chan)
++{
++      int i;
++
++      /* disable dma */
++      mtk_hsdma_write(hsdma, HSDMA_REG_GLO_CFG, 0);
++
++      /* disable intr */
++      mtk_hsdma_write(hsdma, HSDMA_REG_INT_MASK, 0);
++
++      /* init desc value */
++      for (i = 0; i < HSDMA_DESCS_NUM; i++) {
++              chan->tx_ring[i].addr0 = 0;
++              chan->tx_ring[i].flags = HSDMA_DESC_LS0 |
++                      HSDMA_DESC_DONE;
++      }
++      for (i = 0; i < HSDMA_DESCS_NUM; i++) {
++              chan->rx_ring[i].addr0 = 0;
++              chan->rx_ring[i].flags = 0;
++      }
++
++      /* reset */
++      mtk_hsdma_reset_chan(hsdma, chan);
++
++      /* enable intr */
++      mtk_hsdma_write(hsdma, HSDMA_REG_INT_MASK, HSDMA_INT_RX_Q0);
++
++      /* enable dma */
++      mtk_hsdma_write(hsdma, HSDMA_REG_GLO_CFG, HSDMA_GLO_DEFAULT);
++}
++
++static int mtk_hsdma_terminate_all(struct dma_chan *c)
++{
++      struct mtk_hsdma_chan *chan = to_mtk_hsdma_chan(c);
++      struct mtk_hsdam_engine *hsdma = mtk_hsdma_chan_get_dev(chan);
++      unsigned long timeout;
++      LIST_HEAD(head);
++
++      spin_lock_bh(&chan->vchan.lock);
++      chan->desc = NULL;
++      clear_bit(chan->id, &hsdma->chan_issued);
++      vchan_get_all_descriptors(&chan->vchan, &head);
++      spin_unlock_bh(&chan->vchan.lock);
++
++      vchan_dma_desc_free_list(&chan->vchan, &head);
++
++      /* wait dma transfer complete */
++      timeout = jiffies + msecs_to_jiffies(2000);
++      while (mtk_hsdma_read(hsdma, HSDMA_REG_GLO_CFG) &
++                      (HSDMA_GLO_RX_BUSY | HSDMA_GLO_TX_BUSY)) {
++              if (time_after_eq(jiffies, timeout)) {
++                      hsdma_dump_desc(hsdma, chan);
++                      mtk_hsdma_reset(hsdma, chan);
++                      dev_err(hsdma->ddev.dev, "timeout, reset it\n");
++                      break;
++              }
++              cpu_relax();
++      }
++
++      return 0;
++}
++
++static int mtk_hsdma_start_transfer(struct mtk_hsdam_engine *hsdma,
++              struct mtk_hsdma_chan *chan)
++{
++      dma_addr_t src, dst;
++      size_t len, tlen;
++      struct hsdma_desc *tx_desc, *rx_desc;
++      struct mtk_hsdma_sg *sg;
++      unsigned int i;
++      int rx_idx;
++
++      sg = &chan->desc->sg[0];
++      len = sg->len;
++      chan->desc->num_sgs = DIV_ROUND_UP(len, HSDMA_MAX_PLEN);
++
++      /* tx desc */
++      src = sg->src_addr;
++      for (i = 0; i < chan->desc->num_sgs; i++) {
++              if (len > HSDMA_MAX_PLEN)
++                      tlen = HSDMA_MAX_PLEN;
++              else
++                      tlen = len;
++
++              if (i & 0x1) {
++                      tx_desc->addr1 = src;
++                      tx_desc->flags |= HSDMA_DESC_PLEN1(tlen);
++              } else {
++                      tx_desc = &chan->tx_ring[chan->tx_idx];
++                      tx_desc->addr0 = src;
++                      tx_desc->flags = HSDMA_DESC_PLEN0(tlen);
++
++                      /* update index */
++                      chan->tx_idx = HSDMA_NEXT_DESC(chan->tx_idx);
++              }
++
++              src += tlen;
++              len -= tlen;
++      }
++      if (i & 0x1)
++              tx_desc->flags |= HSDMA_DESC_LS0;
++      else
++              tx_desc->flags |= HSDMA_DESC_LS1;
++
++      /* rx desc */
++      rx_idx = HSDMA_NEXT_DESC(chan->rx_idx);
++      len = sg->len;
++      dst = sg->dst_addr;
++      for (i = 0; i < chan->desc->num_sgs; i++) {
++              rx_desc = &chan->rx_ring[rx_idx];
++              if (len > HSDMA_MAX_PLEN)
++                      tlen = HSDMA_MAX_PLEN;
++              else
++                      tlen = len;
++
++              rx_desc->addr0 = dst;
++              rx_desc->flags = HSDMA_DESC_PLEN0(tlen);
++
++              dst += tlen;
++              len -= tlen;
++
++              /* update index */
++              rx_idx = HSDMA_NEXT_DESC(rx_idx);
++      }
++
++      /* make sure desc and index all up to date */
++      wmb();
++      mtk_hsdma_write(hsdma, HSDMA_REG_TX_CTX, chan->tx_idx);
++
++      return 0;
++}
++
++static int gdma_next_desc(struct mtk_hsdma_chan *chan)
++{
++      struct virt_dma_desc *vdesc;
++
++      vdesc = vchan_next_desc(&chan->vchan);
++      if (!vdesc) {
++              chan->desc = NULL;
++              return 0;
++      }
++      chan->desc = to_mtk_hsdma_desc(vdesc);
++      chan->next_sg = 0;
++
++      return 1;
++}
++
++static void mtk_hsdma_chan_done(struct mtk_hsdam_engine *hsdma,
++              struct mtk_hsdma_chan *chan)
++{
++      struct mtk_hsdma_desc *desc;
++      int chan_issued;
++
++      chan_issued = 0;
++      spin_lock_bh(&chan->vchan.lock);
++      desc = chan->desc;
++      if (likely(desc)) {
++              if (chan->next_sg == desc->num_sgs) {
++                      list_del(&desc->vdesc.node);
++                      vchan_cookie_complete(&desc->vdesc);
++                      chan_issued = gdma_next_desc(chan);
++              }
++      } else
++              dev_dbg(hsdma->ddev.dev, "no desc to complete\n");
++
++      if (chan_issued)
++              set_bit(chan->id, &hsdma->chan_issued);
++      spin_unlock_bh(&chan->vchan.lock);
++}
++
++static irqreturn_t mtk_hsdma_irq(int irq, void *devid)
++{
++      struct mtk_hsdam_engine *hsdma = devid;
++      u32 status;
++
++      status = mtk_hsdma_read(hsdma, HSDMA_REG_INT_STATUS);
++      if (unlikely(!status))
++              return IRQ_NONE;
++
++      if (likely(status & HSDMA_INT_RX_Q0))
++              tasklet_schedule(&hsdma->task);
++      else
++              dev_dbg(hsdma->ddev.dev, "unhandle irq status %08x\n",
++                              status);
++      /* clean intr bits */
++      mtk_hsdma_write(hsdma, HSDMA_REG_INT_STATUS, status);
++
++      return IRQ_HANDLED;
++}
++
++static void mtk_hsdma_issue_pending(struct dma_chan *c)
++{
++      struct mtk_hsdma_chan *chan = to_mtk_hsdma_chan(c);
++      struct mtk_hsdam_engine *hsdma = mtk_hsdma_chan_get_dev(chan);
++
++      spin_lock_bh(&chan->vchan.lock);
++      if (vchan_issue_pending(&chan->vchan) && !chan->desc) {
++              if (gdma_next_desc(chan)) {
++                      set_bit(chan->id, &hsdma->chan_issued);
++                      tasklet_schedule(&hsdma->task);
++              } else
++                      dev_dbg(hsdma->ddev.dev, "no desc to issue\n");
++      }
++      spin_unlock_bh(&chan->vchan.lock);
++}
++
++static struct dma_async_tx_descriptor * mtk_hsdma_prep_dma_memcpy(
++              struct dma_chan *c, dma_addr_t dest, dma_addr_t src,
++              size_t len, unsigned long flags)
++{
++      struct mtk_hsdma_chan *chan = to_mtk_hsdma_chan(c);
++      struct mtk_hsdma_desc *desc;
++
++      if (len <= 0)
++              return NULL;
++
++      desc = kzalloc(sizeof(struct mtk_hsdma_desc), GFP_ATOMIC);
++      if (!desc) {
++              dev_err(c->device->dev, "alloc memcpy decs error\n");
++              return NULL;
++      }
++
++      desc->sg[0].src_addr = src;
++      desc->sg[0].dst_addr = dest;
++      desc->sg[0].len = len;
++
++      return vchan_tx_prep(&chan->vchan, &desc->vdesc, flags);
++}
++
++static enum dma_status mtk_hsdma_tx_status(struct dma_chan *c,
++              dma_cookie_t cookie, struct dma_tx_state *state)
++{
++      return dma_cookie_status(c, cookie, state);
++}
++
++static void mtk_hsdma_free_chan_resources(struct dma_chan *c)
++{
++      vchan_free_chan_resources(to_virt_chan(c));
++}
++
++static void mtk_hsdma_desc_free(struct virt_dma_desc *vdesc)
++{
++      kfree(container_of(vdesc, struct mtk_hsdma_desc, vdesc));
++}
++
++static void mtk_hsdma_tx(struct mtk_hsdam_engine *hsdma)
++{
++      struct mtk_hsdma_chan *chan;
++
++      if (test_and_clear_bit(0, &hsdma->chan_issued)) {
++              chan = &hsdma->chan[0];
++              if (chan->desc) {
++                      mtk_hsdma_start_transfer(hsdma, chan);
++              } else
++                      dev_dbg(hsdma->ddev.dev,"chan 0 no desc to issue\n");
++      }
++}
++
++static void mtk_hsdma_rx(struct mtk_hsdam_engine *hsdma)
++{
++      struct mtk_hsdma_chan *chan;
++      int next_idx, drx_idx, cnt;
++
++      chan = &hsdma->chan[0];
++      next_idx = HSDMA_NEXT_DESC(chan->rx_idx);
++      drx_idx = mtk_hsdma_read(hsdma, HSDMA_REG_RX_DRX);
++
++      cnt = (drx_idx - next_idx) & HSDMA_DESCS_MASK;
++      if (!cnt)
++              return;
++
++      chan->next_sg += cnt;
++      chan->rx_idx = (chan->rx_idx + cnt) & HSDMA_DESCS_MASK;
++
++      /* update rx crx */
++      wmb();
++      mtk_hsdma_write(hsdma, HSDMA_REG_RX_CRX, chan->rx_idx);
++
++      mtk_hsdma_chan_done(hsdma, chan);
++}
++
++static void mtk_hsdma_tasklet(unsigned long arg)
++{
++      struct mtk_hsdam_engine *hsdma = (struct mtk_hsdam_engine *)arg;
++
++      mtk_hsdma_rx(hsdma);
++      mtk_hsdma_tx(hsdma);
++}
++
++static int mtk_hsdam_alloc_desc(struct mtk_hsdam_engine *hsdma,
++              struct mtk_hsdma_chan *chan)
++{
++      int i;
++
++      chan->tx_ring = dma_alloc_coherent(hsdma->ddev.dev,
++                      2 * HSDMA_DESCS_NUM * sizeof(*chan->tx_ring),
++                      &chan->desc_addr, GFP_ATOMIC | __GFP_ZERO);
++      if (!chan->tx_ring)
++              goto no_mem;
++
++      chan->rx_ring = &chan->tx_ring[HSDMA_DESCS_NUM];
++
++      /* init tx ring value */
++      for (i = 0; i < HSDMA_DESCS_NUM; i++)
++              chan->tx_ring[i].flags = HSDMA_DESC_LS0 | HSDMA_DESC_DONE;
++
++      return 0;
++no_mem:
++      return -ENOMEM;
++}
++
++static void mtk_hsdam_free_desc(struct mtk_hsdam_engine *hsdma,
++              struct mtk_hsdma_chan *chan)
++{
++      if (chan->tx_ring) {
++              dma_free_coherent(hsdma->ddev.dev,
++                              2 * HSDMA_DESCS_NUM * sizeof(*chan->tx_ring),
++                              chan->tx_ring, chan->desc_addr);
++              chan->tx_ring = NULL;
++              chan->rx_ring = NULL;
++      }
++}
++
++static int mtk_hsdma_init(struct mtk_hsdam_engine *hsdma)
++{
++      struct mtk_hsdma_chan *chan;
++      int ret;
++      u32 reg;
++
++      /* init desc */
++      chan = &hsdma->chan[0];
++      ret = mtk_hsdam_alloc_desc(hsdma, chan);
++      if (ret)
++              return ret;
++
++      /* tx */
++      mtk_hsdma_write(hsdma, HSDMA_REG_TX_BASE, chan->desc_addr);
++      mtk_hsdma_write(hsdma, HSDMA_REG_TX_CNT, HSDMA_DESCS_NUM);
++      /* rx */
++      mtk_hsdma_write(hsdma, HSDMA_REG_RX_BASE, chan->desc_addr +
++                      (sizeof(struct hsdma_desc) * HSDMA_DESCS_NUM));
++      mtk_hsdma_write(hsdma, HSDMA_REG_RX_CNT, HSDMA_DESCS_NUM);
++      /* reset */
++      mtk_hsdma_reset_chan(hsdma, chan);
++
++      /* enable rx intr */
++      mtk_hsdma_write(hsdma, HSDMA_REG_INT_MASK, HSDMA_INT_RX_Q0);
++
++      /* enable dma */
++      mtk_hsdma_write(hsdma, HSDMA_REG_GLO_CFG, HSDMA_GLO_DEFAULT);
++
++      /* hardware info */
++      reg = mtk_hsdma_read(hsdma, HSDMA_REG_INFO);
++      dev_info(hsdma->ddev.dev, "rx: %d, tx: %d\n",
++                      (reg >> HSDMA_INFO_RX_SHIFT) & HSDMA_INFO_RX_MASK,
++                      (reg >> HSDMA_INFO_TX_SHIFT) & HSDMA_INFO_TX_MASK);
++
++      hsdma_dump_reg(hsdma);
++
++      return ret;
++}
++
++static void mtk_hsdma_uninit(struct mtk_hsdam_engine *hsdma)
++{
++      struct mtk_hsdma_chan *chan;
++
++      /* disable dma */
++      mtk_hsdma_write(hsdma, HSDMA_REG_GLO_CFG, 0);
++
++      /* disable intr */
++      mtk_hsdma_write(hsdma, HSDMA_REG_INT_MASK, 0);
++
++      /* free desc */
++      chan = &hsdma->chan[0];
++      mtk_hsdam_free_desc(hsdma, chan);
++
++      /* tx */
++      mtk_hsdma_write(hsdma, HSDMA_REG_TX_BASE, 0);
++      mtk_hsdma_write(hsdma, HSDMA_REG_TX_CNT, 0);
++      /* rx */
++      mtk_hsdma_write(hsdma, HSDMA_REG_RX_BASE, 0);
++      mtk_hsdma_write(hsdma, HSDMA_REG_RX_CNT, 0);
++      /* reset */
++      mtk_hsdma_reset_chan(hsdma, chan);
++}
++
++static const struct of_device_id mtk_hsdma_of_match[] = {
++      { .compatible = "mediatek,mt7621-hsdma" },
++      { },
++};
++
++static int mtk_hsdma_probe(struct platform_device *pdev)
++{
++      const struct of_device_id *match;
++      struct mtk_hsdma_chan *chan;
++      struct mtk_hsdam_engine *hsdma;
++      struct dma_device *dd;
++      struct resource *res;
++      int ret;
++      int irq;
++      void __iomem *base;
++
++      ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
++      if (ret)
++              return ret;
++
++      match = of_match_device(mtk_hsdma_of_match, &pdev->dev);
++      if (!match)
++              return -EINVAL;
++
++      hsdma = devm_kzalloc(&pdev->dev, sizeof(*hsdma), GFP_KERNEL);
++      if (!hsdma) {
++              dev_err(&pdev->dev, "alloc dma device failed\n");
++              return -EINVAL;
++      }
++
++      res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
++      base = devm_ioremap_resource(&pdev->dev, res);
++      if (IS_ERR(base))
++              return PTR_ERR(base);
++      hsdma->base = base + HSDMA_BASE_OFFSET;
++      tasklet_init(&hsdma->task, mtk_hsdma_tasklet, (unsigned long)hsdma);
++
++      irq = platform_get_irq(pdev, 0);
++      if (irq < 0) {
++              dev_err(&pdev->dev, "failed to get irq\n");
++              return -EINVAL;
++      }
++      ret = devm_request_irq(&pdev->dev, irq, mtk_hsdma_irq,
++                      0, dev_name(&pdev->dev), hsdma);
++      if (ret) {
++              dev_err(&pdev->dev, "failed to request irq\n");
++              return ret;
++      }
++
++      device_reset(&pdev->dev);
++
++      dd = &hsdma->ddev;
++      dma_cap_set(DMA_MEMCPY, dd->cap_mask);
++      dd->copy_align = HSDMA_ALIGN_SIZE;
++      dd->device_free_chan_resources = mtk_hsdma_free_chan_resources;
++      dd->device_prep_dma_memcpy = mtk_hsdma_prep_dma_memcpy;
++      dd->device_terminate_all = mtk_hsdma_terminate_all;
++      dd->device_tx_status = mtk_hsdma_tx_status;
++      dd->device_issue_pending = mtk_hsdma_issue_pending;
++      dd->dev = &pdev->dev;
++      dd->dev->dma_parms = &hsdma->dma_parms;
++      dma_set_max_seg_size(dd->dev, HSDMA_MAX_PLEN);
++      INIT_LIST_HEAD(&dd->channels);
++
++      chan = &hsdma->chan[0];
++      chan->id = 0;
++      chan->vchan.desc_free = mtk_hsdma_desc_free;
++      vchan_init(&chan->vchan, dd);
++
++      /* init hardware */
++      ret = mtk_hsdma_init(hsdma);
++      if (ret) {
++              dev_err(&pdev->dev, "failed to alloc ring descs\n");
++              return ret;
++      }
++
++      ret = dma_async_device_register(dd);
++      if (ret) {
++              dev_err(&pdev->dev, "failed to register dma device\n");
++              return ret;
++      }
++
++      ret = of_dma_controller_register(pdev->dev.of_node,
++                      of_dma_xlate_by_chan_id, hsdma);
++      if (ret) {
++              dev_err(&pdev->dev, "failed to register of dma controller\n");
++              goto err_unregister;
++      }
++
++      platform_set_drvdata(pdev, hsdma);
++
++      return 0;
++
++err_unregister:
++      dma_async_device_unregister(dd);
++      return ret;
++}
++
++static int mtk_hsdma_remove(struct platform_device *pdev)
++{
++      struct mtk_hsdam_engine *hsdma = platform_get_drvdata(pdev);
++
++      mtk_hsdma_uninit(hsdma);
++
++      of_dma_controller_free(pdev->dev.of_node);
++      dma_async_device_unregister(&hsdma->ddev);
++
++      return 0;
++}
++
++static struct platform_driver mtk_hsdma_driver = {
++      .probe = mtk_hsdma_probe,
++      .remove = mtk_hsdma_remove,
++      .driver = {
++              .name = "hsdma-mt7621",
++              .of_match_table = mtk_hsdma_of_match,
++      },
++};
++module_platform_driver(mtk_hsdma_driver);
++
++MODULE_AUTHOR("Michael Lee <igvtee@gmail.com>");
++MODULE_DESCRIPTION("MTK HSDMA driver");
++MODULE_LICENSE("GPL v2");