ramips: add rlt8366s initvals to the F5D8235 V1 DTS
[oweals/openwrt.git] / target / linux / ramips / patches-4.4 / 0047-DMA-ralink-add-rt2880-dma-engine.patch
index 3362d4b5fcbe18a27a31cee9eebc9edcede78835..d100a082e7722bd24441376557c36c571db08f47 100644 (file)
@@ -14,13 +14,19 @@ Signed-off-by: John Crispin <blogic@openwrt.org>
 
 --- a/drivers/dma/Kconfig
 +++ b/drivers/dma/Kconfig
-@@ -40,6 +40,12 @@ config ASYNC_TX_ENABLE_CHANNEL_SWITCH
+@@ -40,6 +40,18 @@ config ASYNC_TX_ENABLE_CHANNEL_SWITCH
  config ARCH_HAS_ASYNC_TX_FIND_CHANNEL
        bool
  
 +config DMA_RALINK
 +      tristate "RALINK DMA support"
-+      depends on RALINK && SOC_MT7620
++      depends on RALINK && !SOC_RT288X
++      select DMA_ENGINE
++      select DMA_VIRTUAL_CHANNELS
++
++config MTK_HSDMA
++      tristate "MTK HSDMA support"
++      depends on RALINK && SOC_MT7621
 +      select DMA_ENGINE
 +      select DMA_VIRTUAL_CHANNELS
 +
@@ -29,16 +35,17 @@ Signed-off-by: John Crispin <blogic@openwrt.org>
  
 --- a/drivers/dma/Makefile
 +++ b/drivers/dma/Makefile
-@@ -65,5 +65,6 @@ obj-$(CONFIG_TI_DMA_CROSSBAR) += ti-dma-
+@@ -65,5 +65,7 @@ obj-$(CONFIG_TI_DMA_CROSSBAR) += ti-dma-
  obj-$(CONFIG_TI_EDMA) += edma.o
  obj-$(CONFIG_XGENE_DMA) += xgene-dma.o
  obj-$(CONFIG_ZX_DMA) += zx296702_dma.o
 +obj-$(CONFIG_DMA_RALINK) += ralink-gdma.o
++obj-$(CONFIG_MTK_HSDMA) += mtk-hsdma.o
  
  obj-y += xilinx/
 --- /dev/null
 +++ b/drivers/dma/ralink-gdma.c
-@@ -0,0 +1,577 @@
+@@ -0,0 +1,928 @@
 +/*
 + *  Copyright (C) 2013, Lars-Peter Clausen <lars@metafoo.de>
 + *  GDMA4740 DMAC support
@@ -48,10 +55,6 @@ Signed-off-by: John Crispin <blogic@openwrt.org>
 + *  Free Software Foundation;  either version 2 of the License, or (at your
 + *  option) any later version.
 + *
-+ *  You should have received a copy of the GNU General Public License along
-+ *  with this program; if not, write to the Free Software Foundation, Inc.,
-+ *  675 Mass Ave, Cambridge, MA 02139, USA.
-+ *
 + */
 +
 +#include <linux/dmaengine.h>
@@ -65,11 +68,11 @@ Signed-off-by: John Crispin <blogic@openwrt.org>
 +#include <linux/spinlock.h>
 +#include <linux/irq.h>
 +#include <linux/of_dma.h>
++#include <linux/reset.h>
++#include <linux/of_device.h>
 +
 +#include "virt-dma.h"
 +
-+#define GDMA_NR_CHANS                 16
-+
 +#define GDMA_REG_SRC_ADDR(x)          (0x00 + (x) * 0x10)
 +#define GDMA_REG_DST_ADDR(x)          (0x04 + (x) * 0x10)
 +
@@ -84,7 +87,7 @@ Signed-off-by: John Crispin <blogic@openwrt.org>
 +#define GDMA_REG_CTRL0_BURST_SHIFT    3
 +#define       GDMA_REG_CTRL0_DONE_INT         BIT(2)
 +#define       GDMA_REG_CTRL0_ENABLE           BIT(1)
-+#define       GDMA_REG_CTRL0_HW_MODE          0
++#define GDMA_REG_CTRL0_SW_MODE          BIT(0)
 +
 +#define GDMA_REG_CTRL1(x)             (0x0c + (x) * 0x10)
 +#define GDMA_REG_CTRL1_SEG_MASK               0xf
@@ -109,16 +112,39 @@ Signed-off-by: John Crispin <blogic@openwrt.org>
 +#define GDMA_REG_GCT_VER_SHIFT                1
 +#define GDMA_REG_GCT_ARBIT_RR         BIT(0)
 +
++#define GDMA_REG_REQSTS                       0x2a0
++#define GDMA_REG_ACKSTS                       0x2a4
++#define GDMA_REG_FINSTS                       0x2a8
++
++/* for RT305X gdma registers */
++#define GDMA_RT305X_CTRL0_REQ_MASK    0xf
++#define GDMA_RT305X_CTRL0_SRC_REQ_SHIFT       12
++#define GDMA_RT305X_CTRL0_DST_REQ_SHIFT       8
++
++#define GDMA_RT305X_CTRL1_FAIL                BIT(4)
++#define GDMA_RT305X_CTRL1_NEXT_MASK   0x7
++#define GDMA_RT305X_CTRL1_NEXT_SHIFT  1
++
++#define GDMA_RT305X_STATUS_INT                0x80
++#define GDMA_RT305X_STATUS_SIGNAL     0x84
++#define GDMA_RT305X_GCT                       0x88
++
++/* for MT7621 gdma registers */
++#define GDMA_REG_PERF_START(x)                (0x230 + (x) * 0x8)
++#define GDMA_REG_PERF_END(x)          (0x234 + (x) * 0x8)
++
 +enum gdma_dma_transfer_size {
 +      GDMA_TRANSFER_SIZE_4BYTE        = 0,
 +      GDMA_TRANSFER_SIZE_8BYTE        = 1,
 +      GDMA_TRANSFER_SIZE_16BYTE       = 2,
 +      GDMA_TRANSFER_SIZE_32BYTE       = 3,
++      GDMA_TRANSFER_SIZE_64BYTE       = 4,
 +};
 +
 +struct gdma_dma_sg {
-+      dma_addr_t addr;
-+      unsigned int len;
++      dma_addr_t src_addr;
++      dma_addr_t dst_addr;
++      u32 len;
 +};
 +
 +struct gdma_dma_desc {
@@ -127,6 +153,7 @@ Signed-off-by: John Crispin <blogic@openwrt.org>
 +      enum dma_transfer_direction direction;
 +      bool cyclic;
 +
++      u32 residue;
 +      unsigned int num_sgs;
 +      struct gdma_dma_sg sg[];
 +};
@@ -134,9 +161,10 @@ Signed-off-by: John Crispin <blogic@openwrt.org>
 +struct gdma_dmaengine_chan {
 +      struct virt_dma_chan vchan;
 +      unsigned int id;
++      unsigned int slave_id;
 +
 +      dma_addr_t fifo_addr;
-+      unsigned int transfer_shift;
++      enum gdma_dma_transfer_size burst_size;
 +
 +      struct gdma_dma_desc *desc;
 +      unsigned int next_sg;
@@ -144,10 +172,22 @@ Signed-off-by: John Crispin <blogic@openwrt.org>
 +
 +struct gdma_dma_dev {
 +      struct dma_device ddev;
++      struct device_dma_parameters dma_parms;
++      struct gdma_data *data;
 +      void __iomem *base;
-+      struct clk *clk;
++      struct tasklet_struct task;
++      volatile unsigned long chan_issued;
++      atomic_t cnt;
++
++      struct gdma_dmaengine_chan chan[];
++};
 +
-+      struct gdma_dmaengine_chan chan[GDMA_NR_CHANS];
++struct gdma_data
++{
++      int chancnt;
++      u32 done_int_reg;
++      void (*init)(struct gdma_dma_dev *dma_dev);
++      int (*start_transfer)(struct gdma_dmaengine_chan *chan);
 +};
 +
 +static struct gdma_dma_dev *gdma_dma_chan_get_dev(
@@ -176,21 +216,9 @@ Signed-off-by: John Crispin <blogic@openwrt.org>
 +static inline void gdma_dma_write(struct gdma_dma_dev *dma_dev,
 +      unsigned reg, uint32_t val)
 +{
-+      //printk("gdma --> %p = 0x%08X\n", dma_dev->base + reg, val);
 +      writel(val, dma_dev->base + reg);
 +}
 +
-+static inline void gdma_dma_write_mask(struct gdma_dma_dev *dma_dev,
-+      unsigned int reg, uint32_t val, uint32_t mask)
-+{
-+      uint32_t tmp;
-+
-+      tmp = gdma_dma_read(dma_dev, reg);
-+      tmp &= ~mask;
-+      tmp |= val;
-+      gdma_dma_write(dma_dev, reg, tmp);
-+}
-+
 +static struct gdma_dma_desc *gdma_dma_alloc_desc(unsigned int num_sgs)
 +{
 +      return kzalloc(sizeof(struct gdma_dma_desc) +
@@ -199,58 +227,54 @@ Signed-off-by: John Crispin <blogic@openwrt.org>
 +
 +static enum gdma_dma_transfer_size gdma_dma_maxburst(u32 maxburst)
 +{
-+      if (maxburst <= 7)
++      if (maxburst < 2)
 +              return GDMA_TRANSFER_SIZE_4BYTE;
-+      else if (maxburst <= 15)
++      else if (maxburst < 4)
 +              return GDMA_TRANSFER_SIZE_8BYTE;
-+      else if (maxburst <= 31)
++      else if (maxburst < 8)
 +              return GDMA_TRANSFER_SIZE_16BYTE;
-+
-+      return GDMA_TRANSFER_SIZE_32BYTE;
++      else if (maxburst < 16)
++              return GDMA_TRANSFER_SIZE_32BYTE;
++      else
++              return GDMA_TRANSFER_SIZE_64BYTE;
 +}
 +
-+static int gdma_dma_slave_config(struct dma_chan *c,
-+      const struct dma_slave_config *config)
++static int gdma_dma_config(struct dma_chan *c,
++              struct dma_slave_config *config)
 +{
 +      struct gdma_dmaengine_chan *chan = to_gdma_dma_chan(c);
 +      struct gdma_dma_dev *dma_dev = gdma_dma_chan_get_dev(chan);
-+      enum gdma_dma_transfer_size transfer_size;
-+      uint32_t flags;
-+      uint32_t ctrl0, ctrl1;
++
++      if (config->device_fc) {
++              dev_err(dma_dev->ddev.dev, "not support flow controller\n");
++              return -EINVAL;
++      }
 +
 +      switch (config->direction) {
 +      case DMA_MEM_TO_DEV:
-+              ctrl1 = 32 << GDMA_REG_CTRL1_SRC_REQ_SHIFT;
-+              ctrl1 |= config->slave_id << GDMA_REG_CTRL1_DST_REQ_SHIFT;
-+              flags = GDMA_REG_CTRL0_DST_ADDR_FIXED;
-+              transfer_size = gdma_dma_maxburst(config->dst_maxburst);
++              if (config->dst_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES) {
++                      dev_err(dma_dev->ddev.dev, "only support 4 byte buswidth\n");
++                      return -EINVAL;
++              }
++              chan->slave_id = config->slave_id;
 +              chan->fifo_addr = config->dst_addr;
++              chan->burst_size = gdma_dma_maxburst(config->dst_maxburst);
 +              break;
-+
 +      case DMA_DEV_TO_MEM:
-+              ctrl1 = config->slave_id << GDMA_REG_CTRL1_SRC_REQ_SHIFT;
-+              ctrl1 |= 32 << GDMA_REG_CTRL1_DST_REQ_SHIFT;
-+              flags = GDMA_REG_CTRL0_SRC_ADDR_FIXED;
-+              transfer_size = gdma_dma_maxburst(config->src_maxburst);
++              if (config->src_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES) {
++                      dev_err(dma_dev->ddev.dev, "only support 4 byte buswidth\n");
++                      return -EINVAL;
++              }
++              chan->slave_id = config->slave_id;
 +              chan->fifo_addr = config->src_addr;
++              chan->burst_size = gdma_dma_maxburst(config->src_maxburst);
 +              break;
-+
 +      default:
++              dev_err(dma_dev->ddev.dev, "direction type %d error\n",
++                              config->direction);
 +              return -EINVAL;
 +      }
 +
-+      chan->transfer_shift = 1 + transfer_size;
-+
-+      ctrl0 = flags | GDMA_REG_CTRL0_HW_MODE;
-+      ctrl0 |= GDMA_REG_CTRL0_DONE_INT;
-+
-+      ctrl1 &= ~(GDMA_REG_CTRL1_NEXT_MASK << GDMA_REG_CTRL1_NEXT_SHIFT);
-+      ctrl1 |= chan->id << GDMA_REG_CTRL1_NEXT_SHIFT;
-+      ctrl1 |= GDMA_REG_CTRL1_FAIL;
-+      ctrl1 &= ~GDMA_REG_CTRL1_CONTINOUS;
-+      gdma_dma_write(dma_dev, GDMA_REG_CTRL0(chan->id), ctrl0);
-+      gdma_dma_write(dma_dev, GDMA_REG_CTRL1(chan->id), ctrl1);
-+
 +      return 0;
 +}
 +
@@ -258,108 +282,271 @@ Signed-off-by: John Crispin <blogic@openwrt.org>
 +{
 +      struct gdma_dmaengine_chan *chan = to_gdma_dma_chan(c);
 +      struct gdma_dma_dev *dma_dev = gdma_dma_chan_get_dev(chan);
-+      unsigned long flags;
++      unsigned long flags, timeout;
 +      LIST_HEAD(head);
++      int i = 0;
 +
 +      spin_lock_irqsave(&chan->vchan.lock, flags);
-+      gdma_dma_write_mask(dma_dev, GDMA_REG_CTRL0(chan->id), 0,
-+                      GDMA_REG_CTRL0_ENABLE);
 +      chan->desc = NULL;
++      clear_bit(chan->id, &dma_dev->chan_issued);
 +      vchan_get_all_descriptors(&chan->vchan, &head);
 +      spin_unlock_irqrestore(&chan->vchan.lock, flags);
 +
 +      vchan_dma_desc_free_list(&chan->vchan, &head);
 +
++      /* wait dma transfer complete */
++      timeout = jiffies + msecs_to_jiffies(5000);
++      while (gdma_dma_read(dma_dev, GDMA_REG_CTRL0(chan->id)) &
++                      GDMA_REG_CTRL0_ENABLE) {
++              if (time_after_eq(jiffies, timeout)) {
++                      dev_err(dma_dev->ddev.dev, "chan %d wait timeout\n",
++                                      chan->id);
++                      /* restore to init value */
++                      gdma_dma_write(dma_dev, GDMA_REG_CTRL0(chan->id), 0);
++                      break;
++              }
++              cpu_relax();
++              i++;
++      }
++
++      if (i)
++              dev_dbg(dma_dev->ddev.dev, "terminate chan %d loops %d\n",
++                              chan->id, i);
++
 +      return 0;
 +}
 +
-+static int gdma_dma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
-+      unsigned long arg)
++static void rt305x_dump_reg(struct gdma_dma_dev *dma_dev, int id)
 +{
-+      struct dma_slave_config *config = (struct dma_slave_config *)arg;
-+
-+      switch (cmd) {
-+      case DMA_SLAVE_CONFIG:
-+              return gdma_dma_slave_config(chan, config);
-+      case DMA_TERMINATE_ALL:
-+              return gdma_dma_terminate_all(chan);
-+      default:
-+              return -ENOSYS;
-+      }
++      dev_dbg(dma_dev->ddev.dev, "chan %d, src %08x, dst %08x, ctr0 %08x, " \
++                      "ctr1 %08x, intr %08x, signal %08x\n", id,
++                      gdma_dma_read(dma_dev, GDMA_REG_SRC_ADDR(id)),
++                      gdma_dma_read(dma_dev, GDMA_REG_DST_ADDR(id)),
++                      gdma_dma_read(dma_dev, GDMA_REG_CTRL0(id)),
++                      gdma_dma_read(dma_dev, GDMA_REG_CTRL1(id)),
++                      gdma_dma_read(dma_dev, GDMA_RT305X_STATUS_INT),
++                      gdma_dma_read(dma_dev, GDMA_RT305X_STATUS_SIGNAL));
 +}
 +
-+static int gdma_dma_start_transfer(struct gdma_dmaengine_chan *chan)
++static int rt305x_gdma_start_transfer(struct gdma_dmaengine_chan *chan)
 +{
 +      struct gdma_dma_dev *dma_dev = gdma_dma_chan_get_dev(chan);
 +      dma_addr_t src_addr, dst_addr;
-+      struct virt_dma_desc *vdesc;
 +      struct gdma_dma_sg *sg;
++      uint32_t ctrl0, ctrl1;
 +
-+      gdma_dma_write_mask(dma_dev, GDMA_REG_CTRL0(chan->id), 0,
-+                      GDMA_REG_CTRL0_ENABLE);
++      /* verify chan is already stopped */
++      ctrl0 = gdma_dma_read(dma_dev, GDMA_REG_CTRL0(chan->id));
++      if (unlikely(ctrl0 & GDMA_REG_CTRL0_ENABLE)) {
++              dev_err(dma_dev->ddev.dev, "chan %d is start(%08x).\n",
++                              chan->id, ctrl0);
++              rt305x_dump_reg(dma_dev, chan->id);
++              return -EINVAL;
++      }
 +
-+      if (!chan->desc) {
-+              vdesc = vchan_next_desc(&chan->vchan);
-+              if (!vdesc)
-+                      return 0;
-+              chan->desc = to_gdma_dma_desc(vdesc);
-+              chan->next_sg = 0;
++      sg = &chan->desc->sg[chan->next_sg];
++      if (chan->desc->direction == DMA_MEM_TO_DEV) {
++              src_addr = sg->src_addr;
++              dst_addr = chan->fifo_addr;
++              ctrl0 = GDMA_REG_CTRL0_DST_ADDR_FIXED | \
++                      (8 << GDMA_RT305X_CTRL0_SRC_REQ_SHIFT) | \
++                      (chan->slave_id << GDMA_RT305X_CTRL0_DST_REQ_SHIFT);
++      } else if (chan->desc->direction == DMA_DEV_TO_MEM) {
++              src_addr = chan->fifo_addr;
++              dst_addr = sg->dst_addr;
++              ctrl0 = GDMA_REG_CTRL0_SRC_ADDR_FIXED | \
++                      (chan->slave_id << GDMA_RT305X_CTRL0_SRC_REQ_SHIFT) | \
++                      (8 << GDMA_RT305X_CTRL0_DST_REQ_SHIFT);
++      } else if (chan->desc->direction == DMA_MEM_TO_MEM) {
++              /*
++               * TODO: memcpy function have bugs. sometime it will copy
++               * more 8 bytes data when using dmatest verify.
++               */
++              src_addr = sg->src_addr;
++              dst_addr = sg->dst_addr;
++              ctrl0 = GDMA_REG_CTRL0_SW_MODE | \
++                      (8 << GDMA_REG_CTRL1_SRC_REQ_SHIFT) | \
++                      (8 << GDMA_REG_CTRL1_DST_REQ_SHIFT);
++      } else {
++              dev_err(dma_dev->ddev.dev, "direction type %d error\n",
++                              chan->desc->direction);
++              return -EINVAL;
 +      }
 +
-+      if (chan->next_sg == chan->desc->num_sgs)
-+              chan->next_sg = 0;
++      ctrl0 |= (sg->len << GDMA_REG_CTRL0_TX_SHIFT) | \
++               (chan->burst_size << GDMA_REG_CTRL0_BURST_SHIFT) | \
++               GDMA_REG_CTRL0_DONE_INT | GDMA_REG_CTRL0_ENABLE;
++      ctrl1 = chan->id << GDMA_REG_CTRL1_NEXT_SHIFT;
 +
-+      sg = &chan->desc->sg[chan->next_sg];
++      chan->next_sg++;
++      gdma_dma_write(dma_dev, GDMA_REG_SRC_ADDR(chan->id), src_addr);
++      gdma_dma_write(dma_dev, GDMA_REG_DST_ADDR(chan->id), dst_addr);
++      gdma_dma_write(dma_dev, GDMA_REG_CTRL1(chan->id), ctrl1);
++
++      /* make sure next_sg is update */
++      wmb();
++      gdma_dma_write(dma_dev, GDMA_REG_CTRL0(chan->id), ctrl0);
 +
++      return 0;
++}
++
++static void rt3883_dump_reg(struct gdma_dma_dev *dma_dev, int id)
++{
++      dev_dbg(dma_dev->ddev.dev, "chan %d, src %08x, dst %08x, ctr0 %08x, " \
++                      "ctr1 %08x, unmask %08x, done %08x, " \
++                      "req %08x, ack %08x, fin %08x\n", id,
++                      gdma_dma_read(dma_dev, GDMA_REG_SRC_ADDR(id)),
++                      gdma_dma_read(dma_dev, GDMA_REG_DST_ADDR(id)),
++                      gdma_dma_read(dma_dev, GDMA_REG_CTRL0(id)),
++                      gdma_dma_read(dma_dev, GDMA_REG_CTRL1(id)),
++                      gdma_dma_read(dma_dev, GDMA_REG_UNMASK_INT),
++                      gdma_dma_read(dma_dev, GDMA_REG_DONE_INT),
++                      gdma_dma_read(dma_dev, GDMA_REG_REQSTS),
++                      gdma_dma_read(dma_dev, GDMA_REG_ACKSTS),
++                      gdma_dma_read(dma_dev, GDMA_REG_FINSTS));
++}
++
++static int rt3883_gdma_start_transfer(struct gdma_dmaengine_chan *chan)
++{
++      struct gdma_dma_dev *dma_dev = gdma_dma_chan_get_dev(chan);
++      dma_addr_t src_addr, dst_addr;
++      struct gdma_dma_sg *sg;
++      uint32_t ctrl0, ctrl1;
++
++      /* verify chan is already stopped */
++      ctrl0 = gdma_dma_read(dma_dev, GDMA_REG_CTRL0(chan->id));
++      if (unlikely(ctrl0 & GDMA_REG_CTRL0_ENABLE)) {
++              dev_err(dma_dev->ddev.dev, "chan %d is start(%08x).\n",
++                              chan->id, ctrl0);
++              rt3883_dump_reg(dma_dev, chan->id);
++              return -EINVAL;
++      }
++
++      sg = &chan->desc->sg[chan->next_sg];
 +      if (chan->desc->direction == DMA_MEM_TO_DEV) {
-+              src_addr = sg->addr;
++              src_addr = sg->src_addr;
 +              dst_addr = chan->fifo_addr;
-+      } else {
++              ctrl0 = GDMA_REG_CTRL0_DST_ADDR_FIXED;
++              ctrl1 = (32 << GDMA_REG_CTRL1_SRC_REQ_SHIFT) | \
++                      (chan->slave_id << GDMA_REG_CTRL1_DST_REQ_SHIFT);
++      } else if (chan->desc->direction == DMA_DEV_TO_MEM) {
 +              src_addr = chan->fifo_addr;
-+              dst_addr = sg->addr;
++              dst_addr = sg->dst_addr;
++              ctrl0 = GDMA_REG_CTRL0_SRC_ADDR_FIXED;
++              ctrl1 = (chan->slave_id << GDMA_REG_CTRL1_SRC_REQ_SHIFT) | \
++                      (32 << GDMA_REG_CTRL1_DST_REQ_SHIFT) | \
++                      GDMA_REG_CTRL1_COHERENT;
++      } else if (chan->desc->direction == DMA_MEM_TO_MEM) {
++              src_addr = sg->src_addr;
++              dst_addr = sg->dst_addr;
++              ctrl0 = GDMA_REG_CTRL0_SW_MODE;
++              ctrl1 = (32 << GDMA_REG_CTRL1_SRC_REQ_SHIFT) | \
++                      (32 << GDMA_REG_CTRL1_DST_REQ_SHIFT) | \
++                      GDMA_REG_CTRL1_COHERENT;
++      } else {
++              dev_err(dma_dev->ddev.dev, "direction type %d error\n",
++                              chan->desc->direction);
++              return -EINVAL;
 +      }
++
++      ctrl0 |= (sg->len << GDMA_REG_CTRL0_TX_SHIFT) | \
++               (chan->burst_size << GDMA_REG_CTRL0_BURST_SHIFT) | \
++               GDMA_REG_CTRL0_DONE_INT | GDMA_REG_CTRL0_ENABLE;
++      ctrl1 |= chan->id << GDMA_REG_CTRL1_NEXT_SHIFT;
++
++      chan->next_sg++;
 +      gdma_dma_write(dma_dev, GDMA_REG_SRC_ADDR(chan->id), src_addr);
 +      gdma_dma_write(dma_dev, GDMA_REG_DST_ADDR(chan->id), dst_addr);
-+      gdma_dma_write_mask(dma_dev, GDMA_REG_CTRL0(chan->id),
-+                      (sg->len << GDMA_REG_CTRL0_TX_SHIFT) | GDMA_REG_CTRL0_ENABLE,
-+                      GDMA_REG_CTRL0_TX_MASK << GDMA_REG_CTRL0_TX_SHIFT);
-+      chan->next_sg++;
-+      gdma_dma_write_mask(dma_dev, GDMA_REG_CTRL1(chan->id), 0, GDMA_REG_CTRL1_MASK);
++      gdma_dma_write(dma_dev, GDMA_REG_CTRL1(chan->id), ctrl1);
++
++      /* make sure next_sg is update */
++      wmb();
++      gdma_dma_write(dma_dev, GDMA_REG_CTRL0(chan->id), ctrl0);
 +
 +      return 0;
 +}
 +
-+static void gdma_dma_chan_irq(struct gdma_dmaengine_chan *chan)
++static inline int gdma_start_transfer(struct gdma_dma_dev *dma_dev,
++              struct gdma_dmaengine_chan *chan)
++{
++      return dma_dev->data->start_transfer(chan);
++}
++
++static int gdma_next_desc(struct gdma_dmaengine_chan *chan)
++{
++      struct virt_dma_desc *vdesc;
++
++      vdesc = vchan_next_desc(&chan->vchan);
++      if (!vdesc) {
++              chan->desc = NULL;
++              return 0;
++      }
++      chan->desc = to_gdma_dma_desc(vdesc);
++      chan->next_sg = 0;
++
++      return 1;
++}
++
++static void gdma_dma_chan_irq(struct gdma_dma_dev *dma_dev,
++              struct gdma_dmaengine_chan *chan)
 +{
-+      spin_lock(&chan->vchan.lock);
-+      if (chan->desc) {
-+              if (chan->desc && chan->desc->cyclic) {
-+                      vchan_cyclic_callback(&chan->desc->vdesc);
++      struct gdma_dma_desc *desc;
++      unsigned long flags;
++      int chan_issued;
++
++      chan_issued = 0;
++      spin_lock_irqsave(&chan->vchan.lock, flags);
++      desc = chan->desc;
++      if (desc) {
++              if (desc->cyclic) {
++                      vchan_cyclic_callback(&desc->vdesc);
++                      if (chan->next_sg == desc->num_sgs)
++                              chan->next_sg = 0;
++                      chan_issued = 1;
 +              } else {
-+                      if (chan->next_sg == chan->desc->num_sgs) {
-+                              chan->desc = NULL;
-+                              vchan_cookie_complete(&chan->desc->vdesc);
-+                      }
++                      desc->residue -= desc->sg[chan->next_sg - 1].len;
++                      if (chan->next_sg == desc->num_sgs) {
++                              list_del(&desc->vdesc.node);
++                              vchan_cookie_complete(&desc->vdesc);
++                              chan_issued = gdma_next_desc(chan);
++                      } else
++                              chan_issued = 1;
 +              }
-+      }
-+      gdma_dma_start_transfer(chan);
-+      spin_unlock(&chan->vchan.lock);
++      } else
++              dev_dbg(dma_dev->ddev.dev, "chan %d no desc to complete\n",
++                              chan->id);
++      if (chan_issued)
++              set_bit(chan->id, &dma_dev->chan_issued);
++      spin_unlock_irqrestore(&chan->vchan.lock, flags);
 +}
 +
 +static irqreturn_t gdma_dma_irq(int irq, void *devid)
 +{
 +      struct gdma_dma_dev *dma_dev = devid;
-+      uint32_t unmask, done;
++      u32 done, done_reg;
 +      unsigned int i;
 +
-+      unmask = gdma_dma_read(dma_dev, GDMA_REG_UNMASK_INT);
-+      gdma_dma_write(dma_dev, GDMA_REG_UNMASK_INT, unmask);
-+      done = gdma_dma_read(dma_dev, GDMA_REG_DONE_INT);
++      done_reg = dma_dev->data->done_int_reg;
++      done = gdma_dma_read(dma_dev, done_reg);
++      if (unlikely(!done))
++              return IRQ_NONE;
++
++      /* clean done bits */
++      gdma_dma_write(dma_dev, done_reg, done);
 +
-+      for (i = 0; i < GDMA_NR_CHANS; ++i)
-+              if (done & BIT(i))
-+                      gdma_dma_chan_irq(&dma_dev->chan[i]);
-+      gdma_dma_write(dma_dev, GDMA_REG_DONE_INT, done);
++      i = 0;
++      while (done) {
++              if (done & 0x1) {
++                      gdma_dma_chan_irq(dma_dev, &dma_dev->chan[i]);
++                      atomic_dec(&dma_dev->cnt);
++              }
++              done >>= 1;
++              i++;
++      }
++
++      /* start only have work to do */
++      if (dma_dev->chan_issued)
++              tasklet_schedule(&dma_dev->task);
 +
 +      return IRQ_HANDLED;
 +}
@@ -367,18 +554,25 @@ Signed-off-by: John Crispin <blogic@openwrt.org>
 +static void gdma_dma_issue_pending(struct dma_chan *c)
 +{
 +      struct gdma_dmaengine_chan *chan = to_gdma_dma_chan(c);
++      struct gdma_dma_dev *dma_dev = gdma_dma_chan_get_dev(chan);
 +      unsigned long flags;
 +
 +      spin_lock_irqsave(&chan->vchan.lock, flags);
-+      if (vchan_issue_pending(&chan->vchan) && !chan->desc)
-+              gdma_dma_start_transfer(chan);
++      if (vchan_issue_pending(&chan->vchan) && !chan->desc) {
++              if (gdma_next_desc(chan)) {
++                      set_bit(chan->id, &dma_dev->chan_issued);
++                      tasklet_schedule(&dma_dev->task);
++              } else
++                      dev_dbg(dma_dev->ddev.dev, "chan %d no desc to issue\n",
++                                      chan->id);
++      }
 +      spin_unlock_irqrestore(&chan->vchan.lock, flags);
 +}
 +
 +static struct dma_async_tx_descriptor *gdma_dma_prep_slave_sg(
-+      struct dma_chan *c, struct scatterlist *sgl,
-+      unsigned int sg_len, enum dma_transfer_direction direction,
-+      unsigned long flags, void *context)
++              struct dma_chan *c, struct scatterlist *sgl,
++              unsigned int sg_len, enum dma_transfer_direction direction,
++              unsigned long flags, void *context)
 +{
 +      struct gdma_dmaengine_chan *chan = to_gdma_dma_chan(c);
 +      struct gdma_dma_desc *desc;
@@ -386,12 +580,30 @@ Signed-off-by: John Crispin <blogic@openwrt.org>
 +      unsigned int i;
 +
 +      desc = gdma_dma_alloc_desc(sg_len);
-+      if (!desc)
++      if (!desc) {
++              dev_err(c->device->dev, "alloc sg decs error\n");
 +              return NULL;
++      }
++      desc->residue = 0;
 +
 +      for_each_sg(sgl, sg, sg_len, i) {
-+              desc->sg[i].addr = sg_dma_address(sg);
++              if (direction == DMA_MEM_TO_DEV)
++                      desc->sg[i].src_addr = sg_dma_address(sg);
++              else if (direction == DMA_DEV_TO_MEM)
++                      desc->sg[i].dst_addr = sg_dma_address(sg);
++              else {
++                      dev_err(c->device->dev, "direction type %d error\n",
++                                      direction);
++                      goto free_desc;
++              }
++
++              if (unlikely(sg_dma_len(sg) > GDMA_REG_CTRL0_TX_MASK)) {
++                      dev_err(c->device->dev, "sg len too large %d\n",
++                                      sg_dma_len(sg));
++                      goto free_desc;
++              }
 +              desc->sg[i].len = sg_dma_len(sg);
++              desc->residue += sg_dma_len(sg);
 +      }
 +
 +      desc->num_sgs = sg_len;
@@ -399,12 +611,60 @@ Signed-off-by: John Crispin <blogic@openwrt.org>
 +      desc->cyclic = false;
 +
 +      return vchan_tx_prep(&chan->vchan, &desc->vdesc, flags);
++
++free_desc:
++      kfree(desc);
++      return NULL;
++}
++
++static struct dma_async_tx_descriptor * gdma_dma_prep_dma_memcpy(
++              struct dma_chan *c, dma_addr_t dest, dma_addr_t src,
++              size_t len, unsigned long flags)
++{
++      struct gdma_dmaengine_chan *chan = to_gdma_dma_chan(c);
++      struct gdma_dma_desc *desc;
++      unsigned int num_periods, i;
++      size_t xfer_count;
++
++      if (len <= 0)
++              return NULL;
++
++      chan->burst_size = gdma_dma_maxburst(len >> 2);
++
++      xfer_count = GDMA_REG_CTRL0_TX_MASK;
++      num_periods = DIV_ROUND_UP(len, xfer_count);
++
++      desc = gdma_dma_alloc_desc(num_periods);
++      if (!desc) {
++              dev_err(c->device->dev, "alloc memcpy decs error\n");
++              return NULL;
++      }
++      desc->residue = len;
++
++      for (i = 0; i < num_periods; i++) {
++              desc->sg[i].src_addr = src;
++              desc->sg[i].dst_addr = dest;
++              if (len > xfer_count) {
++                      desc->sg[i].len = xfer_count;
++              } else {
++                      desc->sg[i].len = len;
++              }
++              src += desc->sg[i].len;
++              dest += desc->sg[i].len;
++              len -= desc->sg[i].len;
++      }
++
++      desc->num_sgs = num_periods;
++      desc->direction = DMA_MEM_TO_MEM;
++      desc->cyclic = false;
++
++      return vchan_tx_prep(&chan->vchan, &desc->vdesc, flags);
 +}
 +
 +static struct dma_async_tx_descriptor *gdma_dma_prep_dma_cyclic(
 +      struct dma_chan *c, dma_addr_t buf_addr, size_t buf_len,
 +      size_t period_len, enum dma_transfer_direction direction,
-+      unsigned long flags, void *context)
++      unsigned long flags)
 +{
 +      struct gdma_dmaengine_chan *chan = to_gdma_dma_chan(c);
 +      struct gdma_dma_desc *desc;
@@ -413,14 +673,30 @@ Signed-off-by: John Crispin <blogic@openwrt.org>
 +      if (buf_len % period_len)
 +              return NULL;
 +
-+      num_periods = buf_len / period_len;
++      if (period_len > GDMA_REG_CTRL0_TX_MASK) {
++              dev_err(c->device->dev, "cyclic len too large %d\n",
++                              period_len);
++              return NULL;
++      }
 +
++      num_periods = buf_len / period_len;
 +      desc = gdma_dma_alloc_desc(num_periods);
-+      if (!desc)
++      if (!desc) {
++              dev_err(c->device->dev, "alloc cyclic decs error\n");
 +              return NULL;
++      }
++      desc->residue = buf_len;
 +
 +      for (i = 0; i < num_periods; i++) {
-+              desc->sg[i].addr = buf_addr;
++              if (direction == DMA_MEM_TO_DEV)
++                      desc->sg[i].src_addr = buf_addr;
++              else if (direction == DMA_DEV_TO_MEM)
++                      desc->sg[i].dst_addr = buf_addr;
++              else {
++                      dev_err(c->device->dev, "direction type %d error\n",
++                                      direction);
++                      goto free_desc;
++              }
 +              desc->sg[i].len = period_len;
 +              buf_addr += period_len;
 +      }
@@ -430,28 +706,10 @@ Signed-off-by: John Crispin <blogic@openwrt.org>
 +      desc->cyclic = true;
 +
 +      return vchan_tx_prep(&chan->vchan, &desc->vdesc, flags);
-+}
 +
-+static size_t gdma_dma_desc_residue(struct gdma_dmaengine_chan *chan,
-+      struct gdma_dma_desc *desc, unsigned int next_sg)
-+{
-+      struct gdma_dma_dev *dma_dev = gdma_dma_chan_get_dev(chan);
-+      unsigned int residue, count;
-+      unsigned int i;
-+
-+      residue = 0;
-+
-+      for (i = next_sg; i < desc->num_sgs; i++)
-+              residue += desc->sg[i].len;
-+
-+      if (next_sg != 0) {
-+              count = gdma_dma_read(dma_dev, GDMA_REG_CTRL0(chan->id));
-+              count >>= GDMA_REG_CTRL0_CURR_SHIFT;
-+              count &= GDMA_REG_CTRL0_CURR_MASK;
-+              residue += count << chan->transfer_shift;
-+      }
-+
-+      return residue;
++free_desc:
++      kfree(desc);
++      return NULL;
 +}
 +
 +static enum dma_status gdma_dma_tx_status(struct dma_chan *c,
@@ -461,30 +719,32 @@ Signed-off-by: John Crispin <blogic@openwrt.org>
 +      struct virt_dma_desc *vdesc;
 +      enum dma_status status;
 +      unsigned long flags;
++      struct gdma_dma_desc *desc;
 +
 +      status = dma_cookie_status(c, cookie, state);
-+      if (status == DMA_SUCCESS || !state)
++      if (status == DMA_COMPLETE || !state)
 +              return status;
 +
 +      spin_lock_irqsave(&chan->vchan.lock, flags);
-+      vdesc = vchan_find_desc(&chan->vchan, cookie);
-+      if (cookie == chan->desc->vdesc.tx.cookie) {
-+              state->residue = gdma_dma_desc_residue(chan, chan->desc,
-+                              chan->next_sg);
-+      } else if (vdesc) {
-+              state->residue = gdma_dma_desc_residue(chan,
-+                              to_gdma_dma_desc(vdesc), 0);
-+      } else {
-+              state->residue = 0;
-+      }
++      desc = chan->desc;
++      if (desc && (cookie == desc->vdesc.tx.cookie)) {
++              /*
++               * We never update edesc->residue in the cyclic case, so we
++               * can tell the remaining room to the end of the circular
++               * buffer.
++               */
++              if (desc->cyclic)
++                      state->residue = desc->residue -
++                              ((chan->next_sg - 1) * desc->sg[0].len);
++              else
++                      state->residue = desc->residue;
++      } else if ((vdesc = vchan_find_desc(&chan->vchan, cookie)))
++              state->residue = to_gdma_dma_desc(vdesc)->residue;
 +      spin_unlock_irqrestore(&chan->vchan.lock, flags);
 +
-+      return status;
-+}
++      dev_dbg(c->device->dev, "tx residue %d bytes\n", state->residue);
 +
-+static int gdma_dma_alloc_chan_resources(struct dma_chan *c)
-+{
-+      return 0;
++      return status;
 +}
 +
 +static void gdma_dma_free_chan_resources(struct dma_chan *c)
@@ -497,87 +757,192 @@ Signed-off-by: John Crispin <blogic@openwrt.org>
 +      kfree(container_of(vdesc, struct gdma_dma_desc, vdesc));
 +}
 +
-+static struct dma_chan *
-+of_dma_xlate_by_chan_id(struct of_phandle_args *dma_spec,
-+                      struct of_dma *ofdma)
++static void gdma_dma_tasklet(unsigned long arg)
 +{
-+      struct gdma_dma_dev *dma_dev = ofdma->of_dma_data;
-+      unsigned int request = dma_spec->args[0];
++      struct gdma_dma_dev *dma_dev = (struct gdma_dma_dev *)arg;
++      struct gdma_dmaengine_chan *chan;
++      static unsigned int last_chan;
++      unsigned int i, chan_mask;
++
++      /* record last chan to round robin all chans */
++      i = last_chan;
++      chan_mask = dma_dev->data->chancnt - 1;
++      do {
++              /*
++               * on mt7621. when verify with dmatest with all
++               * channel is enable. we need to limit only two
++               * channel is working at the same time. otherwise the
++               * data will have problem.
++               */
++              if (atomic_read(&dma_dev->cnt) >= 2) {
++                      last_chan = i;
++                      break;
++              }
 +
-+      if (request >= GDMA_NR_CHANS)
-+              return NULL;
++              if (test_and_clear_bit(i, &dma_dev->chan_issued)) {
++                      chan = &dma_dev->chan[i];
++                      if (chan->desc) {
++                              atomic_inc(&dma_dev->cnt);
++                              gdma_start_transfer(dma_dev, chan);
++                      } else
++                              dev_dbg(dma_dev->ddev.dev, "chan %d no desc to issue\n", chan->id);
++
++                      if (!dma_dev->chan_issued)
++                              break;
++              }
++
++              i = (i + 1) & chan_mask;
++      } while (i != last_chan);
++}
++
++static void rt305x_gdma_init(struct gdma_dma_dev *dma_dev)
++{
++      uint32_t gct;
++
++      /* all chans round robin */
++      gdma_dma_write(dma_dev, GDMA_RT305X_GCT, GDMA_REG_GCT_ARBIT_RR);
 +
-+      return dma_get_slave_channel(&(dma_dev->chan[request].vchan.chan));
++      gct = gdma_dma_read(dma_dev, GDMA_RT305X_GCT);
++      dev_info(dma_dev->ddev.dev, "revision: %d, channels: %d\n",
++                      (gct >> GDMA_REG_GCT_VER_SHIFT) & GDMA_REG_GCT_VER_MASK,
++                      8 << ((gct >> GDMA_REG_GCT_CHAN_SHIFT) &
++                              GDMA_REG_GCT_CHAN_MASK));
 +}
 +
++static void rt3883_gdma_init(struct gdma_dma_dev *dma_dev)
++{
++      uint32_t gct;
++
++      /* all chans round robin */
++      gdma_dma_write(dma_dev, GDMA_REG_GCT, GDMA_REG_GCT_ARBIT_RR);
++
++      gct = gdma_dma_read(dma_dev, GDMA_REG_GCT);
++      dev_info(dma_dev->ddev.dev, "revision: %d, channels: %d\n",
++                      (gct >> GDMA_REG_GCT_VER_SHIFT) & GDMA_REG_GCT_VER_MASK,
++                      8 << ((gct >> GDMA_REG_GCT_CHAN_SHIFT) &
++                              GDMA_REG_GCT_CHAN_MASK));
++}
++
++static struct gdma_data rt305x_gdma_data = {
++      .chancnt = 8,
++      .done_int_reg = GDMA_RT305X_STATUS_INT,
++      .init = rt305x_gdma_init,
++      .start_transfer = rt305x_gdma_start_transfer,
++};
++
++static struct gdma_data rt3883_gdma_data = {
++      .chancnt = 16,
++      .done_int_reg = GDMA_REG_DONE_INT,
++      .init = rt3883_gdma_init,
++      .start_transfer = rt3883_gdma_start_transfer,
++};
++
++static const struct of_device_id gdma_of_match_table[] = {
++      { .compatible = "ralink,rt305x-gdma", .data = &rt305x_gdma_data },
++      { .compatible = "ralink,rt3883-gdma", .data = &rt3883_gdma_data },
++      { },
++};
++
 +static int gdma_dma_probe(struct platform_device *pdev)
 +{
++      const struct of_device_id *match;
 +      struct gdma_dmaengine_chan *chan;
 +      struct gdma_dma_dev *dma_dev;
 +      struct dma_device *dd;
 +      unsigned int i;
 +      struct resource *res;
-+      uint32_t gct;
 +      int ret;
 +      int irq;
++      void __iomem *base;
++      struct gdma_data *data;
 +
++      ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
++      if (ret)
++              return ret;
 +
-+      dma_dev = devm_kzalloc(&pdev->dev, sizeof(*dma_dev), GFP_KERNEL);
-+      if (!dma_dev)
++      match = of_match_device(gdma_of_match_table, &pdev->dev);
++      if (!match)
 +              return -EINVAL;
++      data = (struct gdma_data *) match->data;
 +
-+      dd = &dma_dev->ddev;
++      dma_dev = devm_kzalloc(&pdev->dev, sizeof(*dma_dev) +
++                      (sizeof(struct gdma_dmaengine_chan) * data->chancnt),
++                      GFP_KERNEL);
++      if (!dma_dev) {
++              dev_err(&pdev->dev, "alloc dma device failed\n");
++              return -EINVAL;
++      }
++      dma_dev->data = data;
 +
 +      res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-+      dma_dev->base = devm_ioremap_resource(&pdev->dev, res);
-+      if (IS_ERR(dma_dev->base))
-+              return PTR_ERR(dma_dev->base);
++      base = devm_ioremap_resource(&pdev->dev, res);
++      if (IS_ERR(base))
++              return PTR_ERR(base);
++      dma_dev->base = base;
++      tasklet_init(&dma_dev->task, gdma_dma_tasklet, (unsigned long)dma_dev);
++
++      irq = platform_get_irq(pdev, 0);
++      if (irq < 0) {
++              dev_err(&pdev->dev, "failed to get irq\n");
++              return -EINVAL;
++      }
++      ret = devm_request_irq(&pdev->dev, irq, gdma_dma_irq,
++                      0, dev_name(&pdev->dev), dma_dev);
++      if (ret) {
++              dev_err(&pdev->dev, "failed to request irq\n");
++              return ret;
++      }
++
++      device_reset(&pdev->dev);
 +
++      dd = &dma_dev->ddev;
++      dma_cap_set(DMA_MEMCPY, dd->cap_mask);
 +      dma_cap_set(DMA_SLAVE, dd->cap_mask);
 +      dma_cap_set(DMA_CYCLIC, dd->cap_mask);
-+      dd->device_alloc_chan_resources = gdma_dma_alloc_chan_resources;
 +      dd->device_free_chan_resources = gdma_dma_free_chan_resources;
-+      dd->device_tx_status = gdma_dma_tx_status;
-+      dd->device_issue_pending = gdma_dma_issue_pending;
++      dd->device_prep_dma_memcpy = gdma_dma_prep_dma_memcpy;
 +      dd->device_prep_slave_sg = gdma_dma_prep_slave_sg;
 +      dd->device_prep_dma_cyclic = gdma_dma_prep_dma_cyclic;
-+      dd->device_control = gdma_dma_control;
++      dd->device_config = gdma_dma_config;
++      dd->device_terminate_all = gdma_dma_terminate_all;
++      dd->device_tx_status = gdma_dma_tx_status;
++      dd->device_issue_pending = gdma_dma_issue_pending;
++
++      dd->src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
++      dd->dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
++      dd->directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
++      dd->residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT;
++
 +      dd->dev = &pdev->dev;
-+      dd->chancnt = GDMA_NR_CHANS;
++      dd->dev->dma_parms = &dma_dev->dma_parms;
++      dma_set_max_seg_size(dd->dev, GDMA_REG_CTRL0_TX_MASK);
 +      INIT_LIST_HEAD(&dd->channels);
 +
-+      for (i = 0; i < dd->chancnt; i++) {
++      for (i = 0; i < data->chancnt; i++) {
 +              chan = &dma_dev->chan[i];
 +              chan->id = i;
 +              chan->vchan.desc_free = gdma_dma_desc_free;
 +              vchan_init(&chan->vchan, dd);
 +      }
 +
++      /* init hardware */
++      data->init(dma_dev);
++
 +      ret = dma_async_device_register(dd);
-+      if (ret)
++      if (ret) {
++              dev_err(&pdev->dev, "failed to register dma device\n");
 +              return ret;
++      }
 +
 +      ret = of_dma_controller_register(pdev->dev.of_node,
 +              of_dma_xlate_by_chan_id, dma_dev);
-+      if (ret)
-+              goto err_unregister;
-+
-+      irq = platform_get_irq(pdev, 0);
-+      ret = request_irq(irq, gdma_dma_irq, 0, dev_name(&pdev->dev), dma_dev);
-+      if (ret)
++      if (ret) {
++              dev_err(&pdev->dev, "failed to register of dma controller\n");
 +              goto err_unregister;
++      }
 +
-+      gdma_dma_write(dma_dev, GDMA_REG_UNMASK_INT, 0);
-+      gdma_dma_write(dma_dev, GDMA_REG_DONE_INT, BIT(dd->chancnt) - 1);
-+
-+      gct = gdma_dma_read(dma_dev, GDMA_REG_GCT);
-+      dev_info(&pdev->dev, "revision: %d, channels: %d\n",
-+              (gct >> GDMA_REG_GCT_VER_SHIFT) & GDMA_REG_GCT_VER_MASK,
-+              8 << ((gct >> GDMA_REG_GCT_CHAN_SHIFT) & GDMA_REG_GCT_CHAN_MASK));
 +      platform_set_drvdata(pdev, dma_dev);
 +
-+      gdma_dma_write(dma_dev, GDMA_REG_GCT, GDMA_REG_GCT_ARBIT_RR);
-+
 +      return 0;
 +
 +err_unregister:
@@ -588,34 +953,27 @@ Signed-off-by: John Crispin <blogic@openwrt.org>
 +static int gdma_dma_remove(struct platform_device *pdev)
 +{
 +      struct gdma_dma_dev *dma_dev = platform_get_drvdata(pdev);
-+      int irq = platform_get_irq(pdev, 0);
 +
-+      free_irq(irq, dma_dev);
++      tasklet_kill(&dma_dev->task);
 +        of_dma_controller_free(pdev->dev.of_node);
 +      dma_async_device_unregister(&dma_dev->ddev);
 +
 +      return 0;
 +}
 +
-+static const struct of_device_id gdma_of_match_table[] = {
-+      { .compatible = "ralink,rt2880-gdma" },
-+      { },
-+};
-+
 +static struct platform_driver gdma_dma_driver = {
 +      .probe = gdma_dma_probe,
 +      .remove = gdma_dma_remove,
 +      .driver = {
 +              .name = "gdma-rt2880",
-+              .owner = THIS_MODULE,
 +              .of_match_table = gdma_of_match_table,
 +      },
 +};
 +module_platform_driver(gdma_dma_driver);
 +
 +MODULE_AUTHOR("Lars-Peter Clausen <lars@metafoo.de>");
-+MODULE_DESCRIPTION("GDMA4740 DMA driver");
-+MODULE_LICENSE("GPLv2");
++MODULE_DESCRIPTION("Ralink/MTK DMA driver");
++MODULE_LICENSE("GPL v2");
 --- a/include/linux/dmaengine.h
 +++ b/include/linux/dmaengine.h
 @@ -496,6 +496,7 @@ static inline void dma_set_unmap(struct
@@ -626,3 +984,773 @@ Signed-off-by: John Crispin <blogic@openwrt.org>
  #else
  static inline void dma_set_unmap(struct dma_async_tx_descriptor *tx,
                                 struct dmaengine_unmap_data *unmap)
+--- /dev/null
++++ b/drivers/dma/mtk-hsdma.c
+@@ -0,0 +1,767 @@
++/*
++ *  Copyright (C) 2015, Michael Lee <igvtee@gmail.com>
++ *  MTK HSDMA support
++ *
++ *  This program is free software; you can redistribute it and/or modify it
++ *  under  the terms of the GNU General        Public License as published by the
++ *  Free Software Foundation;  either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ */
++
++#include <linux/dmaengine.h>
++#include <linux/dma-mapping.h>
++#include <linux/err.h>
++#include <linux/init.h>
++#include <linux/list.h>
++#include <linux/module.h>
++#include <linux/platform_device.h>
++#include <linux/slab.h>
++#include <linux/spinlock.h>
++#include <linux/irq.h>
++#include <linux/of_dma.h>
++#include <linux/reset.h>
++#include <linux/of_device.h>
++
++#include "virt-dma.h"
++
++#define HSDMA_BASE_OFFSET             0x800
++
++#define HSDMA_REG_TX_BASE             0x00
++#define HSDMA_REG_TX_CNT              0x04
++#define HSDMA_REG_TX_CTX              0x08
++#define HSDMA_REG_TX_DTX              0x0c
++#define HSDMA_REG_RX_BASE             0x100
++#define HSDMA_REG_RX_CNT              0x104
++#define HSDMA_REG_RX_CRX              0x108
++#define HSDMA_REG_RX_DRX              0x10c
++#define HSDMA_REG_INFO                        0x200
++#define HSDMA_REG_GLO_CFG             0x204
++#define HSDMA_REG_RST_CFG             0x208
++#define HSDMA_REG_DELAY_INT           0x20c
++#define HSDMA_REG_FREEQ_THRES         0x210
++#define HSDMA_REG_INT_STATUS          0x220
++#define HSDMA_REG_INT_MASK            0x228
++#define HSDMA_REG_SCH_Q01             0x280
++#define HSDMA_REG_SCH_Q23             0x284
++
++#define HSDMA_DESCS_MAX                       0xfff
++#define HSDMA_DESCS_NUM                       8
++#define HSDMA_DESCS_MASK              (HSDMA_DESCS_NUM - 1)
++#define HSDMA_NEXT_DESC(x)            (((x) + 1) & HSDMA_DESCS_MASK)
++
++/* HSDMA_REG_INFO */
++#define HSDMA_INFO_INDEX_MASK         0xf
++#define HSDMA_INFO_INDEX_SHIFT                24
++#define HSDMA_INFO_BASE_MASK          0xff
++#define HSDMA_INFO_BASE_SHIFT         16
++#define HSDMA_INFO_RX_MASK            0xff
++#define HSDMA_INFO_RX_SHIFT           8
++#define HSDMA_INFO_TX_MASK            0xff
++#define HSDMA_INFO_TX_SHIFT           0
++
++/* HSDMA_REG_GLO_CFG */
++#define HSDMA_GLO_TX_2B_OFFSET                BIT(31)
++#define HSDMA_GLO_CLK_GATE            BIT(30)
++#define HSDMA_GLO_BYTE_SWAP           BIT(29)
++#define HSDMA_GLO_MULTI_DMA           BIT(10)
++#define HSDMA_GLO_TWO_BUF             BIT(9)
++#define HSDMA_GLO_32B_DESC            BIT(8)
++#define HSDMA_GLO_BIG_ENDIAN          BIT(7)
++#define HSDMA_GLO_TX_DONE             BIT(6)
++#define HSDMA_GLO_BT_MASK             0x3
++#define HSDMA_GLO_BT_SHIFT            4
++#define HSDMA_GLO_RX_BUSY             BIT(3)
++#define HSDMA_GLO_RX_DMA              BIT(2)
++#define HSDMA_GLO_TX_BUSY             BIT(1)
++#define HSDMA_GLO_TX_DMA              BIT(0)
++
++#define HSDMA_BT_SIZE_16BYTES         (0 << HSDMA_GLO_BT_SHIFT)
++#define HSDMA_BT_SIZE_32BYTES         (1 << HSDMA_GLO_BT_SHIFT)
++#define HSDMA_BT_SIZE_64BYTES         (2 << HSDMA_GLO_BT_SHIFT)
++#define HSDMA_BT_SIZE_128BYTES                (3 << HSDMA_GLO_BT_SHIFT)
++
++#define HSDMA_GLO_DEFAULT             (HSDMA_GLO_MULTI_DMA | \
++              HSDMA_GLO_RX_DMA | HSDMA_GLO_TX_DMA | HSDMA_BT_SIZE_32BYTES)
++
++/* HSDMA_REG_RST_CFG */
++#define HSDMA_RST_RX_SHIFT            16
++#define HSDMA_RST_TX_SHIFT            0
++
++/* HSDMA_REG_DELAY_INT */
++#define HSDMA_DELAY_INT_EN            BIT(15)
++#define HSDMA_DELAY_PEND_OFFSET               8
++#define HSDMA_DELAY_TIME_OFFSET               0
++#define HSDMA_DELAY_TX_OFFSET         16
++#define HSDMA_DELAY_RX_OFFSET         0
++
++#define HSDMA_DELAY_INIT(x)           (HSDMA_DELAY_INT_EN | \
++              ((x) << HSDMA_DELAY_PEND_OFFSET))
++#define HSDMA_DELAY(x)                        ((HSDMA_DELAY_INIT(x) << \
++              HSDMA_DELAY_TX_OFFSET) | HSDMA_DELAY_INIT(x))
++
++/* HSDMA_REG_INT_STATUS */
++#define HSDMA_INT_DELAY_RX_COH                BIT(31)
++#define HSDMA_INT_DELAY_RX_INT                BIT(30)
++#define HSDMA_INT_DELAY_TX_COH                BIT(29)
++#define HSDMA_INT_DELAY_TX_INT                BIT(28)
++#define HSDMA_INT_RX_MASK             0x3
++#define HSDMA_INT_RX_SHIFT            16
++#define HSDMA_INT_RX_Q0                       BIT(16)
++#define HSDMA_INT_TX_MASK             0xf
++#define HSDMA_INT_TX_SHIFT            0
++#define HSDMA_INT_TX_Q0                       BIT(0)
++
++/* tx/rx dma desc flags */
++#define HSDMA_PLEN_MASK                       0x3fff
++#define HSDMA_DESC_DONE                       BIT(31)
++#define HSDMA_DESC_LS0                        BIT(30)
++#define HSDMA_DESC_PLEN0(_x)          (((_x) & HSDMA_PLEN_MASK) << 16)
++#define HSDMA_DESC_TAG                        BIT(15)
++#define HSDMA_DESC_LS1                        BIT(14)
++#define HSDMA_DESC_PLEN1(_x)          ((_x) & HSDMA_PLEN_MASK)
++
++/* align 4 bytes */
++#define HSDMA_ALIGN_SIZE              3
++/* align size 128bytes */
++#define HSDMA_MAX_PLEN                        0x3f80
++
++struct hsdma_desc {
++      u32 addr0;
++      u32 flags;
++      u32 addr1;
++      u32 unused;
++};
++
++struct mtk_hsdma_sg {
++      dma_addr_t src_addr;
++      dma_addr_t dst_addr;
++      u32 len;
++};
++
++struct mtk_hsdma_desc {
++      struct virt_dma_desc vdesc;
++      unsigned int num_sgs;
++      struct mtk_hsdma_sg sg[1];
++};
++
++struct mtk_hsdma_chan {
++      struct virt_dma_chan vchan;
++      unsigned int id;
++      dma_addr_t desc_addr;
++      int tx_idx;
++      int rx_idx;
++      struct hsdma_desc *tx_ring;
++      struct hsdma_desc *rx_ring;
++      struct mtk_hsdma_desc *desc;
++      unsigned int next_sg;
++};
++
++struct mtk_hsdam_engine {
++      struct dma_device ddev;
++      struct device_dma_parameters dma_parms;
++      void __iomem *base;
++      struct tasklet_struct task;
++      volatile unsigned long chan_issued;
++
++      struct mtk_hsdma_chan chan[1];
++};
++
++static inline struct mtk_hsdam_engine *mtk_hsdma_chan_get_dev(
++              struct mtk_hsdma_chan *chan)
++{
++      return container_of(chan->vchan.chan.device, struct mtk_hsdam_engine,
++                      ddev);
++}
++
++static inline struct mtk_hsdma_chan *to_mtk_hsdma_chan(struct dma_chan *c)
++{
++      return container_of(c, struct mtk_hsdma_chan, vchan.chan);
++}
++
++static inline struct mtk_hsdma_desc *to_mtk_hsdma_desc(
++              struct virt_dma_desc *vdesc)
++{
++      return container_of(vdesc, struct mtk_hsdma_desc, vdesc);
++}
++
++static inline u32 mtk_hsdma_read(struct mtk_hsdam_engine *hsdma, u32 reg)
++{
++      return readl(hsdma->base + reg);
++}
++
++static inline void mtk_hsdma_write(struct mtk_hsdam_engine *hsdma,
++              unsigned reg, u32 val)
++{
++      writel(val, hsdma->base + reg);
++}
++
++static void mtk_hsdma_reset_chan(struct mtk_hsdam_engine *hsdma,
++              struct mtk_hsdma_chan *chan)
++{
++      chan->tx_idx = 0;
++      chan->rx_idx = HSDMA_DESCS_NUM - 1;
++
++      mtk_hsdma_write(hsdma, HSDMA_REG_TX_CTX, chan->tx_idx);
++      mtk_hsdma_write(hsdma, HSDMA_REG_RX_CRX, chan->rx_idx);
++
++      mtk_hsdma_write(hsdma, HSDMA_REG_RST_CFG,
++                      0x1 << (chan->id + HSDMA_RST_TX_SHIFT));
++      mtk_hsdma_write(hsdma, HSDMA_REG_RST_CFG,
++                      0x1 << (chan->id + HSDMA_RST_RX_SHIFT));
++}
++
++static void hsdma_dump_reg(struct mtk_hsdam_engine *hsdma)
++{
++      dev_dbg(hsdma->ddev.dev, "tbase %08x, tcnt %08x, " \
++                      "tctx %08x, tdtx: %08x, rbase %08x, " \
++                      "rcnt %08x, rctx %08x, rdtx %08x\n",
++                      mtk_hsdma_read(hsdma, HSDMA_REG_TX_BASE),
++                      mtk_hsdma_read(hsdma, HSDMA_REG_TX_CNT),
++                      mtk_hsdma_read(hsdma, HSDMA_REG_TX_CTX),
++                      mtk_hsdma_read(hsdma, HSDMA_REG_TX_DTX),
++                      mtk_hsdma_read(hsdma, HSDMA_REG_RX_BASE),
++                      mtk_hsdma_read(hsdma, HSDMA_REG_RX_CNT),
++                      mtk_hsdma_read(hsdma, HSDMA_REG_RX_CRX),
++                      mtk_hsdma_read(hsdma, HSDMA_REG_RX_DRX));
++
++      dev_dbg(hsdma->ddev.dev, "info %08x, glo %08x, delay %08x, " \
++                      "intr_stat %08x, intr_mask %08x\n",
++                      mtk_hsdma_read(hsdma, HSDMA_REG_INFO),
++                      mtk_hsdma_read(hsdma, HSDMA_REG_GLO_CFG),
++                      mtk_hsdma_read(hsdma, HSDMA_REG_DELAY_INT),
++                      mtk_hsdma_read(hsdma, HSDMA_REG_INT_STATUS),
++                      mtk_hsdma_read(hsdma, HSDMA_REG_INT_MASK));
++}
++
++static void hsdma_dump_desc(struct mtk_hsdam_engine *hsdma,
++              struct mtk_hsdma_chan *chan)
++{
++      struct hsdma_desc *tx_desc;
++      struct hsdma_desc *rx_desc;
++      int i;
++
++      dev_dbg(hsdma->ddev.dev, "tx idx: %d, rx idx: %d\n",
++                      chan->tx_idx, chan->rx_idx);
++
++      for (i = 0; i < HSDMA_DESCS_NUM; i++) {
++              tx_desc = &chan->tx_ring[i];
++              rx_desc = &chan->rx_ring[i];
++
++              dev_dbg(hsdma->ddev.dev, "%d tx addr0: %08x, flags %08x, " \
++                              "tx addr1: %08x, rx addr0 %08x, flags %08x\n",
++                              i, tx_desc->addr0, tx_desc->flags, \
++                              tx_desc->addr1, rx_desc->addr0, rx_desc->flags);
++      }
++}
++
++static void mtk_hsdma_reset(struct mtk_hsdam_engine *hsdma,
++              struct mtk_hsdma_chan *chan)
++{
++      int i;
++
++      /* disable dma */
++      mtk_hsdma_write(hsdma, HSDMA_REG_GLO_CFG, 0);
++
++      /* disable intr */
++      mtk_hsdma_write(hsdma, HSDMA_REG_INT_MASK, 0);
++
++      /* init desc value */
++      for (i = 0; i < HSDMA_DESCS_NUM; i++) {
++              chan->tx_ring[i].addr0 = 0;
++              chan->tx_ring[i].flags = HSDMA_DESC_LS0 |
++                      HSDMA_DESC_DONE;
++      }
++      for (i = 0; i < HSDMA_DESCS_NUM; i++) {
++              chan->rx_ring[i].addr0 = 0;
++              chan->rx_ring[i].flags = 0;
++      }
++
++      /* reset */
++      mtk_hsdma_reset_chan(hsdma, chan);
++
++      /* enable intr */
++      mtk_hsdma_write(hsdma, HSDMA_REG_INT_MASK, HSDMA_INT_RX_Q0);
++
++      /* enable dma */
++      mtk_hsdma_write(hsdma, HSDMA_REG_GLO_CFG, HSDMA_GLO_DEFAULT);
++}
++
++static int mtk_hsdma_terminate_all(struct dma_chan *c)
++{
++      struct mtk_hsdma_chan *chan = to_mtk_hsdma_chan(c);
++      struct mtk_hsdam_engine *hsdma = mtk_hsdma_chan_get_dev(chan);
++      unsigned long timeout;
++      LIST_HEAD(head);
++
++      spin_lock_bh(&chan->vchan.lock);
++      chan->desc = NULL;
++      clear_bit(chan->id, &hsdma->chan_issued);
++      vchan_get_all_descriptors(&chan->vchan, &head);
++      spin_unlock_bh(&chan->vchan.lock);
++
++      vchan_dma_desc_free_list(&chan->vchan, &head);
++
++      /* wait dma transfer complete */
++      timeout = jiffies + msecs_to_jiffies(2000);
++      while (mtk_hsdma_read(hsdma, HSDMA_REG_GLO_CFG) &
++                      (HSDMA_GLO_RX_BUSY | HSDMA_GLO_TX_BUSY)) {
++              if (time_after_eq(jiffies, timeout)) {
++                      hsdma_dump_desc(hsdma, chan);
++                      mtk_hsdma_reset(hsdma, chan);
++                      dev_err(hsdma->ddev.dev, "timeout, reset it\n");
++                      break;
++              }
++              cpu_relax();
++      }
++
++      return 0;
++}
++
++static int mtk_hsdma_start_transfer(struct mtk_hsdam_engine *hsdma,
++              struct mtk_hsdma_chan *chan)
++{
++      dma_addr_t src, dst;
++      size_t len, tlen;
++      struct hsdma_desc *tx_desc, *rx_desc;
++      struct mtk_hsdma_sg *sg;
++      unsigned int i;
++      int rx_idx;
++
++      sg = &chan->desc->sg[0];
++      len = sg->len;
++      chan->desc->num_sgs = DIV_ROUND_UP(len, HSDMA_MAX_PLEN);
++
++      /* tx desc */
++      src = sg->src_addr;
++      for (i = 0; i < chan->desc->num_sgs; i++) {
++              if (len > HSDMA_MAX_PLEN)
++                      tlen = HSDMA_MAX_PLEN;
++              else
++                      tlen = len;
++
++              if (i & 0x1) {
++                      tx_desc->addr1 = src;
++                      tx_desc->flags |= HSDMA_DESC_PLEN1(tlen);
++              } else {
++                      tx_desc = &chan->tx_ring[chan->tx_idx];
++                      tx_desc->addr0 = src;
++                      tx_desc->flags = HSDMA_DESC_PLEN0(tlen);
++
++                      /* update index */
++                      chan->tx_idx = HSDMA_NEXT_DESC(chan->tx_idx);
++              }
++
++              src += tlen;
++              len -= tlen;
++      }
++      if (i & 0x1)
++              tx_desc->flags |= HSDMA_DESC_LS0;
++      else
++              tx_desc->flags |= HSDMA_DESC_LS1;
++
++      /* rx desc */
++      rx_idx = HSDMA_NEXT_DESC(chan->rx_idx);
++      len = sg->len;
++      dst = sg->dst_addr;
++      for (i = 0; i < chan->desc->num_sgs; i++) {
++              rx_desc = &chan->rx_ring[rx_idx];
++              if (len > HSDMA_MAX_PLEN)
++                      tlen = HSDMA_MAX_PLEN;
++              else
++                      tlen = len;
++
++              rx_desc->addr0 = dst;
++              rx_desc->flags = HSDMA_DESC_PLEN0(tlen);
++
++              dst += tlen;
++              len -= tlen;
++
++              /* update index */
++              rx_idx = HSDMA_NEXT_DESC(rx_idx);
++      }
++
++      /* make sure desc and index all up to date */
++      wmb();
++      mtk_hsdma_write(hsdma, HSDMA_REG_TX_CTX, chan->tx_idx);
++
++      return 0;
++}
++
++static int gdma_next_desc(struct mtk_hsdma_chan *chan)
++{
++      struct virt_dma_desc *vdesc;
++
++      vdesc = vchan_next_desc(&chan->vchan);
++      if (!vdesc) {
++              chan->desc = NULL;
++              return 0;
++      }
++      chan->desc = to_mtk_hsdma_desc(vdesc);
++      chan->next_sg = 0;
++
++      return 1;
++}
++
++static void mtk_hsdma_chan_done(struct mtk_hsdam_engine *hsdma,
++              struct mtk_hsdma_chan *chan)
++{
++      struct mtk_hsdma_desc *desc;
++      int chan_issued;
++
++      chan_issued = 0;
++      spin_lock_bh(&chan->vchan.lock);
++      desc = chan->desc;
++      if (likely(desc)) {
++              if (chan->next_sg == desc->num_sgs) {
++                      list_del(&desc->vdesc.node);
++                      vchan_cookie_complete(&desc->vdesc);
++                      chan_issued = gdma_next_desc(chan);
++              }
++      } else
++              dev_dbg(hsdma->ddev.dev, "no desc to complete\n");
++
++      if (chan_issued)
++              set_bit(chan->id, &hsdma->chan_issued);
++      spin_unlock_bh(&chan->vchan.lock);
++}
++
++static irqreturn_t mtk_hsdma_irq(int irq, void *devid)
++{
++      struct mtk_hsdam_engine *hsdma = devid;
++      u32 status;
++
++      status = mtk_hsdma_read(hsdma, HSDMA_REG_INT_STATUS);
++      if (unlikely(!status))
++              return IRQ_NONE;
++
++      if (likely(status & HSDMA_INT_RX_Q0))
++              tasklet_schedule(&hsdma->task);
++      else
++              dev_dbg(hsdma->ddev.dev, "unhandle irq status %08x\n",
++                              status);
++      /* clean intr bits */
++      mtk_hsdma_write(hsdma, HSDMA_REG_INT_STATUS, status);
++
++      return IRQ_HANDLED;
++}
++
++static void mtk_hsdma_issue_pending(struct dma_chan *c)
++{
++      struct mtk_hsdma_chan *chan = to_mtk_hsdma_chan(c);
++      struct mtk_hsdam_engine *hsdma = mtk_hsdma_chan_get_dev(chan);
++
++      spin_lock_bh(&chan->vchan.lock);
++      if (vchan_issue_pending(&chan->vchan) && !chan->desc) {
++              if (gdma_next_desc(chan)) {
++                      set_bit(chan->id, &hsdma->chan_issued);
++                      tasklet_schedule(&hsdma->task);
++              } else
++                      dev_dbg(hsdma->ddev.dev, "no desc to issue\n");
++      }
++      spin_unlock_bh(&chan->vchan.lock);
++}
++
++static struct dma_async_tx_descriptor * mtk_hsdma_prep_dma_memcpy(
++              struct dma_chan *c, dma_addr_t dest, dma_addr_t src,
++              size_t len, unsigned long flags)
++{
++      struct mtk_hsdma_chan *chan = to_mtk_hsdma_chan(c);
++      struct mtk_hsdma_desc *desc;
++
++      if (len <= 0)
++              return NULL;
++
++      desc = kzalloc(sizeof(struct mtk_hsdma_desc), GFP_ATOMIC);
++      if (!desc) {
++              dev_err(c->device->dev, "alloc memcpy decs error\n");
++              return NULL;
++      }
++
++      desc->sg[0].src_addr = src;
++      desc->sg[0].dst_addr = dest;
++      desc->sg[0].len = len;
++
++      return vchan_tx_prep(&chan->vchan, &desc->vdesc, flags);
++}
++
++static enum dma_status mtk_hsdma_tx_status(struct dma_chan *c,
++              dma_cookie_t cookie, struct dma_tx_state *state)
++{
++      return dma_cookie_status(c, cookie, state);
++}
++
++static void mtk_hsdma_free_chan_resources(struct dma_chan *c)
++{
++      vchan_free_chan_resources(to_virt_chan(c));
++}
++
++static void mtk_hsdma_desc_free(struct virt_dma_desc *vdesc)
++{
++      kfree(container_of(vdesc, struct mtk_hsdma_desc, vdesc));
++}
++
++static void mtk_hsdma_tx(struct mtk_hsdam_engine *hsdma)
++{
++      struct mtk_hsdma_chan *chan;
++
++      if (test_and_clear_bit(0, &hsdma->chan_issued)) {
++              chan = &hsdma->chan[0];
++              if (chan->desc) {
++                      mtk_hsdma_start_transfer(hsdma, chan);
++              } else
++                      dev_dbg(hsdma->ddev.dev,"chan 0 no desc to issue\n");
++      }
++}
++
++static void mtk_hsdma_rx(struct mtk_hsdam_engine *hsdma)
++{
++      struct mtk_hsdma_chan *chan;
++      int next_idx, drx_idx, cnt;
++
++      chan = &hsdma->chan[0];
++      next_idx = HSDMA_NEXT_DESC(chan->rx_idx);
++      drx_idx = mtk_hsdma_read(hsdma, HSDMA_REG_RX_DRX);
++
++      cnt = (drx_idx - next_idx) & HSDMA_DESCS_MASK;
++      if (!cnt)
++              return;
++
++      chan->next_sg += cnt;
++      chan->rx_idx = (chan->rx_idx + cnt) & HSDMA_DESCS_MASK;
++
++      /* update rx crx */
++      wmb();
++      mtk_hsdma_write(hsdma, HSDMA_REG_RX_CRX, chan->rx_idx);
++
++      mtk_hsdma_chan_done(hsdma, chan);
++}
++
++static void mtk_hsdma_tasklet(unsigned long arg)
++{
++      struct mtk_hsdam_engine *hsdma = (struct mtk_hsdam_engine *)arg;
++
++      mtk_hsdma_rx(hsdma);
++      mtk_hsdma_tx(hsdma);
++}
++
++static int mtk_hsdam_alloc_desc(struct mtk_hsdam_engine *hsdma,
++              struct mtk_hsdma_chan *chan)
++{
++      int i;
++
++      chan->tx_ring = dma_alloc_coherent(hsdma->ddev.dev,
++                      2 * HSDMA_DESCS_NUM * sizeof(*chan->tx_ring),
++                      &chan->desc_addr, GFP_ATOMIC | __GFP_ZERO);
++      if (!chan->tx_ring)
++              goto no_mem;
++
++      chan->rx_ring = &chan->tx_ring[HSDMA_DESCS_NUM];
++
++      /* init tx ring value */
++      for (i = 0; i < HSDMA_DESCS_NUM; i++)
++              chan->tx_ring[i].flags = HSDMA_DESC_LS0 | HSDMA_DESC_DONE;
++
++      return 0;
++no_mem:
++      return -ENOMEM;
++}
++
++static void mtk_hsdam_free_desc(struct mtk_hsdam_engine *hsdma,
++              struct mtk_hsdma_chan *chan)
++{
++      if (chan->tx_ring) {
++              dma_free_coherent(hsdma->ddev.dev,
++                              2 * HSDMA_DESCS_NUM * sizeof(*chan->tx_ring),
++                              chan->tx_ring, chan->desc_addr);
++              chan->tx_ring = NULL;
++              chan->rx_ring = NULL;
++      }
++}
++
++static int mtk_hsdma_init(struct mtk_hsdam_engine *hsdma)
++{
++      struct mtk_hsdma_chan *chan;
++      int ret;
++      u32 reg;
++
++      /* init desc */
++      chan = &hsdma->chan[0];
++      ret = mtk_hsdam_alloc_desc(hsdma, chan);
++      if (ret)
++              return ret;
++
++      /* tx */
++      mtk_hsdma_write(hsdma, HSDMA_REG_TX_BASE, chan->desc_addr);
++      mtk_hsdma_write(hsdma, HSDMA_REG_TX_CNT, HSDMA_DESCS_NUM);
++      /* rx */
++      mtk_hsdma_write(hsdma, HSDMA_REG_RX_BASE, chan->desc_addr +
++                      (sizeof(struct hsdma_desc) * HSDMA_DESCS_NUM));
++      mtk_hsdma_write(hsdma, HSDMA_REG_RX_CNT, HSDMA_DESCS_NUM);
++      /* reset */
++      mtk_hsdma_reset_chan(hsdma, chan);
++
++      /* enable rx intr */
++      mtk_hsdma_write(hsdma, HSDMA_REG_INT_MASK, HSDMA_INT_RX_Q0);
++
++      /* enable dma */
++      mtk_hsdma_write(hsdma, HSDMA_REG_GLO_CFG, HSDMA_GLO_DEFAULT);
++
++      /* hardware info */
++      reg = mtk_hsdma_read(hsdma, HSDMA_REG_INFO);
++      dev_info(hsdma->ddev.dev, "rx: %d, tx: %d\n",
++                      (reg >> HSDMA_INFO_RX_SHIFT) & HSDMA_INFO_RX_MASK,
++                      (reg >> HSDMA_INFO_TX_SHIFT) & HSDMA_INFO_TX_MASK);
++
++      hsdma_dump_reg(hsdma);
++
++      return ret;
++}
++
++static void mtk_hsdma_uninit(struct mtk_hsdam_engine *hsdma)
++{
++      struct mtk_hsdma_chan *chan;
++
++      /* disable dma */
++      mtk_hsdma_write(hsdma, HSDMA_REG_GLO_CFG, 0);
++
++      /* disable intr */
++      mtk_hsdma_write(hsdma, HSDMA_REG_INT_MASK, 0);
++
++      /* free desc */
++      chan = &hsdma->chan[0];
++      mtk_hsdam_free_desc(hsdma, chan);
++
++      /* tx */
++      mtk_hsdma_write(hsdma, HSDMA_REG_TX_BASE, 0);
++      mtk_hsdma_write(hsdma, HSDMA_REG_TX_CNT, 0);
++      /* rx */
++      mtk_hsdma_write(hsdma, HSDMA_REG_RX_BASE, 0);
++      mtk_hsdma_write(hsdma, HSDMA_REG_RX_CNT, 0);
++      /* reset */
++      mtk_hsdma_reset_chan(hsdma, chan);
++}
++
++static const struct of_device_id mtk_hsdma_of_match[] = {
++      { .compatible = "mediatek,mt7621-hsdma" },
++      { },
++};
++
++static int mtk_hsdma_probe(struct platform_device *pdev)
++{
++      const struct of_device_id *match;
++      struct mtk_hsdma_chan *chan;
++      struct mtk_hsdam_engine *hsdma;
++      struct dma_device *dd;
++      struct resource *res;
++      int ret;
++      int irq;
++      void __iomem *base;
++
++      ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
++      if (ret)
++              return ret;
++
++      match = of_match_device(mtk_hsdma_of_match, &pdev->dev);
++      if (!match)
++              return -EINVAL;
++
++      hsdma = devm_kzalloc(&pdev->dev, sizeof(*hsdma), GFP_KERNEL);
++      if (!hsdma) {
++              dev_err(&pdev->dev, "alloc dma device failed\n");
++              return -EINVAL;
++      }
++
++      res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
++      base = devm_ioremap_resource(&pdev->dev, res);
++      if (IS_ERR(base))
++              return PTR_ERR(base);
++      hsdma->base = base + HSDMA_BASE_OFFSET;
++      tasklet_init(&hsdma->task, mtk_hsdma_tasklet, (unsigned long)hsdma);
++
++      irq = platform_get_irq(pdev, 0);
++      if (irq < 0) {
++              dev_err(&pdev->dev, "failed to get irq\n");
++              return -EINVAL;
++      }
++      ret = devm_request_irq(&pdev->dev, irq, mtk_hsdma_irq,
++                      0, dev_name(&pdev->dev), hsdma);
++      if (ret) {
++              dev_err(&pdev->dev, "failed to request irq\n");
++              return ret;
++      }
++
++      device_reset(&pdev->dev);
++
++      dd = &hsdma->ddev;
++      dma_cap_set(DMA_MEMCPY, dd->cap_mask);
++      dd->copy_align = HSDMA_ALIGN_SIZE;
++      dd->device_free_chan_resources = mtk_hsdma_free_chan_resources;
++      dd->device_prep_dma_memcpy = mtk_hsdma_prep_dma_memcpy;
++      dd->device_terminate_all = mtk_hsdma_terminate_all;
++      dd->device_tx_status = mtk_hsdma_tx_status;
++      dd->device_issue_pending = mtk_hsdma_issue_pending;
++      dd->dev = &pdev->dev;
++      dd->dev->dma_parms = &hsdma->dma_parms;
++      dma_set_max_seg_size(dd->dev, HSDMA_MAX_PLEN);
++      INIT_LIST_HEAD(&dd->channels);
++
++      chan = &hsdma->chan[0];
++      chan->id = 0;
++      chan->vchan.desc_free = mtk_hsdma_desc_free;
++      vchan_init(&chan->vchan, dd);
++
++      /* init hardware */
++      ret = mtk_hsdma_init(hsdma);
++      if (ret) {
++              dev_err(&pdev->dev, "failed to alloc ring descs\n");
++              return ret;
++      }
++
++      ret = dma_async_device_register(dd);
++      if (ret) {
++              dev_err(&pdev->dev, "failed to register dma device\n");
++              return ret;
++      }
++
++      ret = of_dma_controller_register(pdev->dev.of_node,
++                      of_dma_xlate_by_chan_id, hsdma);
++      if (ret) {
++              dev_err(&pdev->dev, "failed to register of dma controller\n");
++              goto err_unregister;
++      }
++
++      platform_set_drvdata(pdev, hsdma);
++
++      return 0;
++
++err_unregister:
++      dma_async_device_unregister(dd);
++      return ret;
++}
++
++static int mtk_hsdma_remove(struct platform_device *pdev)
++{
++      struct mtk_hsdam_engine *hsdma = platform_get_drvdata(pdev);
++
++      mtk_hsdma_uninit(hsdma);
++
++      of_dma_controller_free(pdev->dev.of_node);
++      dma_async_device_unregister(&hsdma->ddev);
++
++      return 0;
++}
++
++static struct platform_driver mtk_hsdma_driver = {
++      .probe = mtk_hsdma_probe,
++      .remove = mtk_hsdma_remove,
++      .driver = {
++              .name = "hsdma-mt7621",
++              .of_match_table = mtk_hsdma_of_match,
++      },
++};
++module_platform_driver(mtk_hsdma_driver);
++
++MODULE_AUTHOR("Michael Lee <igvtee@gmail.com>");
++MODULE_DESCRIPTION("MTK HSDMA driver");
++MODULE_LICENSE("GPL v2");