ipq806x: ArcherC2600: devictree cleanup
[oweals/openwrt.git] / target / linux / apm821xx / patches-4.4 / 015-dmaengine-dw-fixed.patch
1 From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
2 Subject: [PATCH v6 0/4] Fixes / cleanups in dw_dmac (affects on few subsystems)
3 Date: Mon, 25 Apr 2016 15:35:05 +0300
4
5 This patch series (v3: http://www.spinics.net/lists/kernel/msg2215303.html)
6 contains a number of mostly minor fixes and cleanups for the DW DMA driver. A
7 couple of them affect the DT binding so these may need to be updated to
8 maintain compatibility (old format is still supported though). The rest should
9 be relatively straight-forward.
10
11 This version has been tested on the following bare metal platforms:
12 - ATNGW100 (avr32 based platform) with dmatest
13 - Sam460ex (powerpc 44x based platform) with SATA
14 - Intel Braswell with UART
15 - Intel Galileo (Intel Quark based platform) with UART
16
17 (SATA driver and Intel Galileo UART support are based on this series and just
18  published recently for a review)
19
20 Vinod, there are few patch sets developed on top of this one, so, the idea is
21 to keep this in an immuutable branch / tag.
22
23 Changes since v5:
24 - fixed an issue found by kbuildbot
25
26 Changes since v4:
27 - send proper set of patches
28 - add changelog
29
30 Changes since v3:
31 - add patch 1 to check value of dma-masters property
32 - drop the upstreamed patches
33 - update patch 2 to keep an array for data-width property as well
34
35 Changes since v2:
36 - add patch 1 to fix master selection which was broken for long time
37 - remove "use field-by-field initialization" patch since like Mans metioned in
38   has mostly no value and even might increase error prone
39 - rebase on top of recent linux-next
40 - wide testing on several platforms
41
42 Changes since v1:
43 - zeroing struct dw_dma_slave before use
44 - fall back to old data_width property if data-width is not found
45 - append tags for few patches
46 - correct title of cover letter
47 - rebase on top of recent linux-next
48
49 Andy Shevchenko (4):
50   dmaengine: dw: platform: check nr_masters to be non-zero
51   dmaengine: dw: revisit data_width property
52   dmaengine: dw: keep entire platform data in struct dw_dma
53   dmaengine: dw: pass platform data via struct dw_dma_chip
54
55  Documentation/devicetree/bindings/dma/snps-dma.txt |  6 +-
56  arch/arc/boot/dts/abilis_tb10x.dtsi                |  2 +-
57  arch/arm/boot/dts/spear13xx.dtsi                   |  4 +-
58  drivers/ata/sata_dwc_460ex.c                       |  2 +-
59  drivers/dma/dw/core.c                              | 75 ++++++++--------------
60  drivers/dma/dw/pci.c                               |  5 +-
61  drivers/dma/dw/platform.c                          | 32 +++++----
62  drivers/dma/dw/regs.h                              |  5 +-
63  include/linux/dma/dw.h                             |  5 +-
64  include/linux/platform_data/dma-dw.h               |  4 +-
65  sound/soc/intel/common/sst-firmware.c              |  2 +-
66  11 files changed, 64 insertions(+), 78 deletions(-)
67
68 --- a/drivers/dma/dw/core.c
69 +++ b/drivers/dma/dw/core.c
70 @@ -45,22 +45,19 @@
71                         DW_DMA_MSIZE_16;                        \
72                 u8 _dmsize = _is_slave ? _sconfig->dst_maxburst :       \
73                         DW_DMA_MSIZE_16;                        \
74 +               u8 _dms = (_dwc->direction == DMA_MEM_TO_DEV) ?         \
75 +                       _dwc->p_master : _dwc->m_master;                \
76 +               u8 _sms = (_dwc->direction == DMA_DEV_TO_MEM) ?         \
77 +                       _dwc->p_master : _dwc->m_master;                \
78                                                                 \
79                 (DWC_CTLL_DST_MSIZE(_dmsize)                    \
80                  | DWC_CTLL_SRC_MSIZE(_smsize)                  \
81                  | DWC_CTLL_LLP_D_EN                            \
82                  | DWC_CTLL_LLP_S_EN                            \
83 -                | DWC_CTLL_DMS(_dwc->dst_master)               \
84 -                | DWC_CTLL_SMS(_dwc->src_master));             \
85 +                | DWC_CTLL_DMS(_dms)                           \
86 +                | DWC_CTLL_SMS(_sms));                         \
87         })
88  
89 -/*
90 - * Number of descriptors to allocate for each channel. This should be
91 - * made configurable somehow; preferably, the clients (at least the
92 - * ones using slave transfers) should be able to give us a hint.
93 - */
94 -#define NR_DESCS_PER_CHANNEL   64
95 -
96  /* The set of bus widths supported by the DMA controller */
97  #define DW_DMA_BUSWIDTHS                         \
98         BIT(DMA_SLAVE_BUSWIDTH_UNDEFINED)       | \
99 @@ -80,51 +77,65 @@ static struct dw_desc *dwc_first_active(
100         return to_dw_desc(dwc->active_list.next);
101  }
102  
103 -static struct dw_desc *dwc_desc_get(struct dw_dma_chan *dwc)
104 +static dma_cookie_t dwc_tx_submit(struct dma_async_tx_descriptor *tx)
105  {
106 -       struct dw_desc *desc, *_desc;
107 -       struct dw_desc *ret = NULL;
108 -       unsigned int i = 0;
109 -       unsigned long flags;
110 +       struct dw_desc          *desc = txd_to_dw_desc(tx);
111 +       struct dw_dma_chan      *dwc = to_dw_dma_chan(tx->chan);
112 +       dma_cookie_t            cookie;
113 +       unsigned long           flags;
114  
115         spin_lock_irqsave(&dwc->lock, flags);
116 -       list_for_each_entry_safe(desc, _desc, &dwc->free_list, desc_node) {
117 -               i++;
118 -               if (async_tx_test_ack(&desc->txd)) {
119 -                       list_del(&desc->desc_node);
120 -                       ret = desc;
121 -                       break;
122 -               }
123 -               dev_dbg(chan2dev(&dwc->chan), "desc %p not ACKed\n", desc);
124 -       }
125 +       cookie = dma_cookie_assign(tx);
126 +
127 +       /*
128 +        * REVISIT: We should attempt to chain as many descriptors as
129 +        * possible, perhaps even appending to those already submitted
130 +        * for DMA. But this is hard to do in a race-free manner.
131 +        */
132 +
133 +       list_add_tail(&desc->desc_node, &dwc->queue);
134         spin_unlock_irqrestore(&dwc->lock, flags);
135 +       dev_vdbg(chan2dev(tx->chan), "%s: queued %u\n",
136 +                __func__, desc->txd.cookie);
137  
138 -       dev_vdbg(chan2dev(&dwc->chan), "scanned %u descriptors on freelist\n", i);
139 +       return cookie;
140 +}
141  
142 -       return ret;
143 +static struct dw_desc *dwc_desc_get(struct dw_dma_chan *dwc)
144 +{
145 +       struct dw_dma *dw = to_dw_dma(dwc->chan.device);
146 +       struct dw_desc *desc;
147 +       dma_addr_t phys;
148 +
149 +       desc = dma_pool_zalloc(dw->desc_pool, GFP_ATOMIC, &phys);
150 +       if (!desc)
151 +               return NULL;
152 +
153 +       dwc->descs_allocated++;
154 +       INIT_LIST_HEAD(&desc->tx_list);
155 +       dma_async_tx_descriptor_init(&desc->txd, &dwc->chan);
156 +       desc->txd.tx_submit = dwc_tx_submit;
157 +       desc->txd.flags = DMA_CTRL_ACK;
158 +       desc->txd.phys = phys;
159 +       return desc;
160  }
161  
162 -/*
163 - * Move a descriptor, including any children, to the free list.
164 - * `desc' must not be on any lists.
165 - */
166  static void dwc_desc_put(struct dw_dma_chan *dwc, struct dw_desc *desc)
167  {
168 -       unsigned long flags;
169 +       struct dw_dma *dw = to_dw_dma(dwc->chan.device);
170 +       struct dw_desc *child, *_next;
171  
172 -       if (desc) {
173 -               struct dw_desc *child;
174 +       if (unlikely(!desc))
175 +               return;
176  
177 -               spin_lock_irqsave(&dwc->lock, flags);
178 -               list_for_each_entry(child, &desc->tx_list, desc_node)
179 -                       dev_vdbg(chan2dev(&dwc->chan),
180 -                                       "moving child desc %p to freelist\n",
181 -                                       child);
182 -               list_splice_init(&desc->tx_list, &dwc->free_list);
183 -               dev_vdbg(chan2dev(&dwc->chan), "moving desc %p to freelist\n", desc);
184 -               list_add(&desc->desc_node, &dwc->free_list);
185 -               spin_unlock_irqrestore(&dwc->lock, flags);
186 +       list_for_each_entry_safe(child, _next, &desc->tx_list, desc_node) {
187 +               list_del(&child->desc_node);
188 +               dma_pool_free(dw->desc_pool, child, child->txd.phys);
189 +               dwc->descs_allocated--;
190         }
191 +
192 +       dma_pool_free(dw->desc_pool, desc, desc->txd.phys);
193 +       dwc->descs_allocated--;
194  }
195  
196  static void dwc_initialize(struct dw_dma_chan *dwc)
197 @@ -133,7 +144,7 @@ static void dwc_initialize(struct dw_dma
198         u32 cfghi = DWC_CFGH_FIFO_MODE;
199         u32 cfglo = DWC_CFGL_CH_PRIOR(dwc->priority);
200  
201 -       if (dwc->initialized == true)
202 +       if (test_bit(DW_DMA_IS_INITIALIZED, &dwc->flags))
203                 return;
204  
205         cfghi |= DWC_CFGH_DST_PER(dwc->dst_id);
206 @@ -146,26 +157,11 @@ static void dwc_initialize(struct dw_dma
207         channel_set_bit(dw, MASK.XFER, dwc->mask);
208         channel_set_bit(dw, MASK.ERROR, dwc->mask);
209  
210 -       dwc->initialized = true;
211 +       set_bit(DW_DMA_IS_INITIALIZED, &dwc->flags);
212  }
213  
214  /*----------------------------------------------------------------------*/
215  
216 -static inline unsigned int dwc_fast_ffs(unsigned long long v)
217 -{
218 -       /*
219 -        * We can be a lot more clever here, but this should take care
220 -        * of the most common optimization.
221 -        */
222 -       if (!(v & 7))
223 -               return 3;
224 -       else if (!(v & 3))
225 -               return 2;
226 -       else if (!(v & 1))
227 -               return 1;
228 -       return 0;
229 -}
230 -
231  static inline void dwc_dump_chan_regs(struct dw_dma_chan *dwc)
232  {
233         dev_err(chan2dev(&dwc->chan),
234 @@ -197,12 +193,12 @@ static inline void dwc_do_single_block(s
235          * Software emulation of LLP mode relies on interrupts to continue
236          * multi block transfer.
237          */
238 -       ctllo = desc->lli.ctllo | DWC_CTLL_INT_EN;
239 +       ctllo = lli_read(desc, ctllo) | DWC_CTLL_INT_EN;
240  
241 -       channel_writel(dwc, SAR, desc->lli.sar);
242 -       channel_writel(dwc, DAR, desc->lli.dar);
243 +       channel_writel(dwc, SAR, lli_read(desc, sar));
244 +       channel_writel(dwc, DAR, lli_read(desc, dar));
245         channel_writel(dwc, CTL_LO, ctllo);
246 -       channel_writel(dwc, CTL_HI, desc->lli.ctlhi);
247 +       channel_writel(dwc, CTL_HI, lli_read(desc, ctlhi));
248         channel_set_bit(dw, CH_EN, dwc->mask);
249  
250         /* Move pointer to next descriptor */
251 @@ -213,6 +209,7 @@ static inline void dwc_do_single_block(s
252  static void dwc_dostart(struct dw_dma_chan *dwc, struct dw_desc *first)
253  {
254         struct dw_dma   *dw = to_dw_dma(dwc->chan.device);
255 +       u8              lms = DWC_LLP_LMS(dwc->m_master);
256         unsigned long   was_soft_llp;
257  
258         /* ASSERT:  channel is idle */
259 @@ -237,7 +234,7 @@ static void dwc_dostart(struct dw_dma_ch
260  
261                 dwc_initialize(dwc);
262  
263 -               dwc->residue = first->total_len;
264 +               first->residue = first->total_len;
265                 dwc->tx_node_active = &first->tx_list;
266  
267                 /* Submit first block */
268 @@ -248,9 +245,8 @@ static void dwc_dostart(struct dw_dma_ch
269  
270         dwc_initialize(dwc);
271  
272 -       channel_writel(dwc, LLP, first->txd.phys);
273 -       channel_writel(dwc, CTL_LO,
274 -                       DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN);
275 +       channel_writel(dwc, LLP, first->txd.phys | lms);
276 +       channel_writel(dwc, CTL_LO, DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN);
277         channel_writel(dwc, CTL_HI, 0);
278         channel_set_bit(dw, CH_EN, dwc->mask);
279  }
280 @@ -293,11 +289,7 @@ dwc_descriptor_complete(struct dw_dma_ch
281         list_for_each_entry(child, &desc->tx_list, desc_node)
282                 async_tx_ack(&child->txd);
283         async_tx_ack(&desc->txd);
284 -
285 -       list_splice_init(&desc->tx_list, &dwc->free_list);
286 -       list_move(&desc->desc_node, &dwc->free_list);
287 -
288 -       dma_descriptor_unmap(txd);
289 +       dwc_desc_put(dwc, desc);
290         spin_unlock_irqrestore(&dwc->lock, flags);
291  
292         if (callback)
293 @@ -368,11 +360,11 @@ static void dwc_scan_descriptors(struct
294  
295                         head = &desc->tx_list;
296                         if (active != head) {
297 -                               /* Update desc to reflect last sent one */
298 -                               if (active != head->next)
299 -                                       desc = to_dw_desc(active->prev);
300 -
301 -                               dwc->residue -= desc->len;
302 +                               /* Update residue to reflect last sent descriptor */
303 +                               if (active == head->next)
304 +                                       desc->residue -= desc->len;
305 +                               else
306 +                                       desc->residue -= to_dw_desc(active->prev)->len;
307  
308                                 child = to_dw_desc(active);
309  
310 @@ -387,8 +379,6 @@ static void dwc_scan_descriptors(struct
311                         clear_bit(DW_DMA_IS_SOFT_LLP, &dwc->flags);
312                 }
313  
314 -               dwc->residue = 0;
315 -
316                 spin_unlock_irqrestore(&dwc->lock, flags);
317  
318                 dwc_complete_all(dw, dwc);
319 @@ -396,7 +386,6 @@ static void dwc_scan_descriptors(struct
320         }
321  
322         if (list_empty(&dwc->active_list)) {
323 -               dwc->residue = 0;
324                 spin_unlock_irqrestore(&dwc->lock, flags);
325                 return;
326         }
327 @@ -411,31 +400,31 @@ static void dwc_scan_descriptors(struct
328  
329         list_for_each_entry_safe(desc, _desc, &dwc->active_list, desc_node) {
330                 /* Initial residue value */
331 -               dwc->residue = desc->total_len;
332 +               desc->residue = desc->total_len;
333  
334                 /* Check first descriptors addr */
335 -               if (desc->txd.phys == llp) {
336 +               if (desc->txd.phys == DWC_LLP_LOC(llp)) {
337                         spin_unlock_irqrestore(&dwc->lock, flags);
338                         return;
339                 }
340  
341                 /* Check first descriptors llp */
342 -               if (desc->lli.llp == llp) {
343 +               if (lli_read(desc, llp) == llp) {
344                         /* This one is currently in progress */
345 -                       dwc->residue -= dwc_get_sent(dwc);
346 +                       desc->residue -= dwc_get_sent(dwc);
347                         spin_unlock_irqrestore(&dwc->lock, flags);
348                         return;
349                 }
350  
351 -               dwc->residue -= desc->len;
352 +               desc->residue -= desc->len;
353                 list_for_each_entry(child, &desc->tx_list, desc_node) {
354 -                       if (child->lli.llp == llp) {
355 +                       if (lli_read(child, llp) == llp) {
356                                 /* Currently in progress */
357 -                               dwc->residue -= dwc_get_sent(dwc);
358 +                               desc->residue -= dwc_get_sent(dwc);
359                                 spin_unlock_irqrestore(&dwc->lock, flags);
360                                 return;
361                         }
362 -                       dwc->residue -= child->len;
363 +                       desc->residue -= child->len;
364                 }
365  
366                 /*
367 @@ -457,10 +446,14 @@ static void dwc_scan_descriptors(struct
368         spin_unlock_irqrestore(&dwc->lock, flags);
369  }
370  
371 -static inline void dwc_dump_lli(struct dw_dma_chan *dwc, struct dw_lli *lli)
372 +static inline void dwc_dump_lli(struct dw_dma_chan *dwc, struct dw_desc *desc)
373  {
374         dev_crit(chan2dev(&dwc->chan), "  desc: s0x%x d0x%x l0x%x c0x%x:%x\n",
375 -                lli->sar, lli->dar, lli->llp, lli->ctlhi, lli->ctllo);
376 +                lli_read(desc, sar),
377 +                lli_read(desc, dar),
378 +                lli_read(desc, llp),
379 +                lli_read(desc, ctlhi),
380 +                lli_read(desc, ctllo));
381  }
382  
383  static void dwc_handle_error(struct dw_dma *dw, struct dw_dma_chan *dwc)
384 @@ -496,9 +489,9 @@ static void dwc_handle_error(struct dw_d
385          */
386         dev_WARN(chan2dev(&dwc->chan), "Bad descriptor submitted for DMA!\n"
387                                        "  cookie: %d\n", bad_desc->txd.cookie);
388 -       dwc_dump_lli(dwc, &bad_desc->lli);
389 +       dwc_dump_lli(dwc, bad_desc);
390         list_for_each_entry(child, &bad_desc->tx_list, desc_node)
391 -               dwc_dump_lli(dwc, &child->lli);
392 +               dwc_dump_lli(dwc, child);
393  
394         spin_unlock_irqrestore(&dwc->lock, flags);
395  
396 @@ -549,7 +542,7 @@ static void dwc_handle_cyclic(struct dw_
397          */
398         if (unlikely(status_err & dwc->mask) ||
399                         unlikely(status_xfer & dwc->mask)) {
400 -               int i;
401 +               unsigned int i;
402  
403                 dev_err(chan2dev(&dwc->chan),
404                         "cyclic DMA unexpected %s interrupt, stopping DMA transfer\n",
405 @@ -571,7 +564,7 @@ static void dwc_handle_cyclic(struct dw_
406                 dma_writel(dw, CLEAR.XFER, dwc->mask);
407  
408                 for (i = 0; i < dwc->cdesc->periods; i++)
409 -                       dwc_dump_lli(dwc, &dwc->cdesc->desc[i]->lli);
410 +                       dwc_dump_lli(dwc, dwc->cdesc->desc[i]);
411  
412                 spin_unlock_irqrestore(&dwc->lock, flags);
413         }
414 @@ -589,7 +582,7 @@ static void dw_dma_tasklet(unsigned long
415         u32 status_block;
416         u32 status_xfer;
417         u32 status_err;
418 -       int i;
419 +       unsigned int i;
420  
421         status_block = dma_readl(dw, RAW.BLOCK);
422         status_xfer = dma_readl(dw, RAW.XFER);
423 @@ -616,12 +609,17 @@ static void dw_dma_tasklet(unsigned long
424  static irqreturn_t dw_dma_interrupt(int irq, void *dev_id)
425  {
426         struct dw_dma *dw = dev_id;
427 -       u32 status = dma_readl(dw, STATUS_INT);
428 +       u32 status;
429 +
430 +       /* Check if we have any interrupt from the DMAC which is not in use */
431 +       if (!dw->in_use)
432 +               return IRQ_NONE;
433  
434 +       status = dma_readl(dw, STATUS_INT);
435         dev_vdbg(dw->dma.dev, "%s: status=0x%x\n", __func__, status);
436  
437         /* Check if we have any interrupt from the DMAC */
438 -       if (!status || !dw->in_use)
439 +       if (!status)
440                 return IRQ_NONE;
441  
442         /*
443 @@ -653,30 +651,6 @@ static irqreturn_t dw_dma_interrupt(int
444  
445  /*----------------------------------------------------------------------*/
446  
447 -static dma_cookie_t dwc_tx_submit(struct dma_async_tx_descriptor *tx)
448 -{
449 -       struct dw_desc          *desc = txd_to_dw_desc(tx);
450 -       struct dw_dma_chan      *dwc = to_dw_dma_chan(tx->chan);
451 -       dma_cookie_t            cookie;
452 -       unsigned long           flags;
453 -
454 -       spin_lock_irqsave(&dwc->lock, flags);
455 -       cookie = dma_cookie_assign(tx);
456 -
457 -       /*
458 -        * REVISIT: We should attempt to chain as many descriptors as
459 -        * possible, perhaps even appending to those already submitted
460 -        * for DMA. But this is hard to do in a race-free manner.
461 -        */
462 -
463 -       dev_vdbg(chan2dev(tx->chan), "%s: queued %u\n", __func__, desc->txd.cookie);
464 -       list_add_tail(&desc->desc_node, &dwc->queue);
465 -
466 -       spin_unlock_irqrestore(&dwc->lock, flags);
467 -
468 -       return cookie;
469 -}
470 -
471  static struct dma_async_tx_descriptor *
472  dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
473                 size_t len, unsigned long flags)
474 @@ -688,10 +662,12 @@ dwc_prep_dma_memcpy(struct dma_chan *cha
475         struct dw_desc          *prev;
476         size_t                  xfer_count;
477         size_t                  offset;
478 +       u8                      m_master = dwc->m_master;
479         unsigned int            src_width;
480         unsigned int            dst_width;
481 -       unsigned int            data_width;
482 +       unsigned int            data_width = dw->pdata->data_width[m_master];
483         u32                     ctllo;
484 +       u8                      lms = DWC_LLP_LMS(m_master);
485  
486         dev_vdbg(chan2dev(chan),
487                         "%s: d%pad s%pad l0x%zx f0x%lx\n", __func__,
488 @@ -704,11 +680,7 @@ dwc_prep_dma_memcpy(struct dma_chan *cha
489  
490         dwc->direction = DMA_MEM_TO_MEM;
491  
492 -       data_width = min_t(unsigned int, dw->data_width[dwc->src_master],
493 -                          dw->data_width[dwc->dst_master]);
494 -
495 -       src_width = dst_width = min_t(unsigned int, data_width,
496 -                                     dwc_fast_ffs(src | dest | len));
497 +       src_width = dst_width = __ffs(data_width | src | dest | len);
498  
499         ctllo = DWC_DEFAULT_CTLLO(chan)
500                         | DWC_CTLL_DST_WIDTH(dst_width)
501 @@ -726,27 +698,27 @@ dwc_prep_dma_memcpy(struct dma_chan *cha
502                 if (!desc)
503                         goto err_desc_get;
504  
505 -               desc->lli.sar = src + offset;
506 -               desc->lli.dar = dest + offset;
507 -               desc->lli.ctllo = ctllo;
508 -               desc->lli.ctlhi = xfer_count;
509 +               lli_write(desc, sar, src + offset);
510 +               lli_write(desc, dar, dest + offset);
511 +               lli_write(desc, ctllo, ctllo);
512 +               lli_write(desc, ctlhi, xfer_count);
513                 desc->len = xfer_count << src_width;
514  
515                 if (!first) {
516                         first = desc;
517                 } else {
518 -                       prev->lli.llp = desc->txd.phys;
519 -                       list_add_tail(&desc->desc_node,
520 -                                       &first->tx_list);
521 +                       lli_write(prev, llp, desc->txd.phys | lms);
522 +                       list_add_tail(&desc->desc_node, &first->tx_list);
523                 }
524                 prev = desc;
525         }
526  
527         if (flags & DMA_PREP_INTERRUPT)
528                 /* Trigger interrupt after last block */
529 -               prev->lli.ctllo |= DWC_CTLL_INT_EN;
530 +               lli_set(prev, ctllo, DWC_CTLL_INT_EN);
531  
532         prev->lli.llp = 0;
533 +       lli_clear(prev, ctllo, DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN);
534         first->txd.flags = flags;
535         first->total_len = len;
536  
537 @@ -768,10 +740,12 @@ dwc_prep_slave_sg(struct dma_chan *chan,
538         struct dw_desc          *prev;
539         struct dw_desc          *first;
540         u32                     ctllo;
541 +       u8                      m_master = dwc->m_master;
542 +       u8                      lms = DWC_LLP_LMS(m_master);
543         dma_addr_t              reg;
544         unsigned int            reg_width;
545         unsigned int            mem_width;
546 -       unsigned int            data_width;
547 +       unsigned int            data_width = dw->pdata->data_width[m_master];
548         unsigned int            i;
549         struct scatterlist      *sg;
550         size_t                  total_len = 0;
551 @@ -797,8 +771,6 @@ dwc_prep_slave_sg(struct dma_chan *chan,
552                 ctllo |= sconfig->device_fc ? DWC_CTLL_FC(DW_DMA_FC_P_M2P) :
553                         DWC_CTLL_FC(DW_DMA_FC_D_M2P);
554  
555 -               data_width = dw->data_width[dwc->src_master];
556 -
557                 for_each_sg(sgl, sg, sg_len, i) {
558                         struct dw_desc  *desc;
559                         u32             len, dlen, mem;
560 @@ -806,17 +778,16 @@ dwc_prep_slave_sg(struct dma_chan *chan,
561                         mem = sg_dma_address(sg);
562                         len = sg_dma_len(sg);
563  
564 -                       mem_width = min_t(unsigned int,
565 -                                         data_width, dwc_fast_ffs(mem | len));
566 +                       mem_width = __ffs(data_width | mem | len);
567  
568  slave_sg_todev_fill_desc:
569                         desc = dwc_desc_get(dwc);
570                         if (!desc)
571                                 goto err_desc_get;
572  
573 -                       desc->lli.sar = mem;
574 -                       desc->lli.dar = reg;
575 -                       desc->lli.ctllo = ctllo | DWC_CTLL_SRC_WIDTH(mem_width);
576 +                       lli_write(desc, sar, mem);
577 +                       lli_write(desc, dar, reg);
578 +                       lli_write(desc, ctllo, ctllo | DWC_CTLL_SRC_WIDTH(mem_width));
579                         if ((len >> mem_width) > dwc->block_size) {
580                                 dlen = dwc->block_size << mem_width;
581                                 mem += dlen;
582 @@ -826,15 +797,14 @@ slave_sg_todev_fill_desc:
583                                 len = 0;
584                         }
585  
586 -                       desc->lli.ctlhi = dlen >> mem_width;
587 +                       lli_write(desc, ctlhi, dlen >> mem_width);
588                         desc->len = dlen;
589  
590                         if (!first) {
591                                 first = desc;
592                         } else {
593 -                               prev->lli.llp = desc->txd.phys;
594 -                               list_add_tail(&desc->desc_node,
595 -                                               &first->tx_list);
596 +                               lli_write(prev, llp, desc->txd.phys | lms);
597 +                               list_add_tail(&desc->desc_node, &first->tx_list);
598                         }
599                         prev = desc;
600                         total_len += dlen;
601 @@ -854,8 +824,6 @@ slave_sg_todev_fill_desc:
602                 ctllo |= sconfig->device_fc ? DWC_CTLL_FC(DW_DMA_FC_P_P2M) :
603                         DWC_CTLL_FC(DW_DMA_FC_D_P2M);
604  
605 -               data_width = dw->data_width[dwc->dst_master];
606 -
607                 for_each_sg(sgl, sg, sg_len, i) {
608                         struct dw_desc  *desc;
609                         u32             len, dlen, mem;
610 @@ -863,17 +831,16 @@ slave_sg_todev_fill_desc:
611                         mem = sg_dma_address(sg);
612                         len = sg_dma_len(sg);
613  
614 -                       mem_width = min_t(unsigned int,
615 -                                         data_width, dwc_fast_ffs(mem | len));
616 +                       mem_width = __ffs(data_width | mem | len);
617  
618  slave_sg_fromdev_fill_desc:
619                         desc = dwc_desc_get(dwc);
620                         if (!desc)
621                                 goto err_desc_get;
622  
623 -                       desc->lli.sar = reg;
624 -                       desc->lli.dar = mem;
625 -                       desc->lli.ctllo = ctllo | DWC_CTLL_DST_WIDTH(mem_width);
626 +                       lli_write(desc, sar, reg);
627 +                       lli_write(desc, dar, mem);
628 +                       lli_write(desc, ctllo, ctllo | DWC_CTLL_DST_WIDTH(mem_width));
629                         if ((len >> reg_width) > dwc->block_size) {
630                                 dlen = dwc->block_size << reg_width;
631                                 mem += dlen;
632 @@ -882,15 +849,14 @@ slave_sg_fromdev_fill_desc:
633                                 dlen = len;
634                                 len = 0;
635                         }
636 -                       desc->lli.ctlhi = dlen >> reg_width;
637 +                       lli_write(desc, ctlhi, dlen >> reg_width);
638                         desc->len = dlen;
639  
640                         if (!first) {
641                                 first = desc;
642                         } else {
643 -                               prev->lli.llp = desc->txd.phys;
644 -                               list_add_tail(&desc->desc_node,
645 -                                               &first->tx_list);
646 +                               lli_write(prev, llp, desc->txd.phys | lms);
647 +                               list_add_tail(&desc->desc_node, &first->tx_list);
648                         }
649                         prev = desc;
650                         total_len += dlen;
651 @@ -905,9 +871,10 @@ slave_sg_fromdev_fill_desc:
652  
653         if (flags & DMA_PREP_INTERRUPT)
654                 /* Trigger interrupt after last block */
655 -               prev->lli.ctllo |= DWC_CTLL_INT_EN;
656 +               lli_set(prev, ctllo, DWC_CTLL_INT_EN);
657  
658         prev->lli.llp = 0;
659 +       lli_clear(prev, ctllo, DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN);
660         first->total_len = total_len;
661  
662         return &first->txd;
663 @@ -932,8 +899,8 @@ bool dw_dma_filter(struct dma_chan *chan
664         dwc->src_id = dws->src_id;
665         dwc->dst_id = dws->dst_id;
666  
667 -       dwc->src_master = dws->src_master;
668 -       dwc->dst_master = dws->dst_master;
669 +       dwc->m_master = dws->m_master;
670 +       dwc->p_master = dws->p_master;
671  
672         return true;
673  }
674 @@ -986,7 +953,7 @@ static int dwc_pause(struct dma_chan *ch
675         while (!(channel_readl(dwc, CFG_LO) & DWC_CFGL_FIFO_EMPTY) && count--)
676                 udelay(2);
677  
678 -       dwc->paused = true;
679 +       set_bit(DW_DMA_IS_PAUSED, &dwc->flags);
680  
681         spin_unlock_irqrestore(&dwc->lock, flags);
682  
683 @@ -999,7 +966,7 @@ static inline void dwc_chan_resume(struc
684  
685         channel_writel(dwc, CFG_LO, cfglo & ~DWC_CFGL_CH_SUSP);
686  
687 -       dwc->paused = false;
688 +       clear_bit(DW_DMA_IS_PAUSED, &dwc->flags);
689  }
690  
691  static int dwc_resume(struct dma_chan *chan)
692 @@ -1007,12 +974,10 @@ static int dwc_resume(struct dma_chan *c
693         struct dw_dma_chan      *dwc = to_dw_dma_chan(chan);
694         unsigned long           flags;
695  
696 -       if (!dwc->paused)
697 -               return 0;
698 -
699         spin_lock_irqsave(&dwc->lock, flags);
700  
701 -       dwc_chan_resume(dwc);
702 +       if (test_bit(DW_DMA_IS_PAUSED, &dwc->flags))
703 +               dwc_chan_resume(dwc);
704  
705         spin_unlock_irqrestore(&dwc->lock, flags);
706  
707 @@ -1048,16 +1013,37 @@ static int dwc_terminate_all(struct dma_
708         return 0;
709  }
710  
711 -static inline u32 dwc_get_residue(struct dw_dma_chan *dwc)
712 +static struct dw_desc *dwc_find_desc(struct dw_dma_chan *dwc, dma_cookie_t c)
713 +{
714 +       struct dw_desc *desc;
715 +
716 +       list_for_each_entry(desc, &dwc->active_list, desc_node)
717 +               if (desc->txd.cookie == c)
718 +                       return desc;
719 +
720 +       return NULL;
721 +}
722 +
723 +static u32 dwc_get_residue(struct dw_dma_chan *dwc, dma_cookie_t cookie)
724  {
725 +       struct dw_desc *desc;
726         unsigned long flags;
727         u32 residue;
728  
729         spin_lock_irqsave(&dwc->lock, flags);
730  
731 -       residue = dwc->residue;
732 -       if (test_bit(DW_DMA_IS_SOFT_LLP, &dwc->flags) && residue)
733 -               residue -= dwc_get_sent(dwc);
734 +       desc = dwc_find_desc(dwc, cookie);
735 +       if (desc) {
736 +               if (desc == dwc_first_active(dwc)) {
737 +                       residue = desc->residue;
738 +                       if (test_bit(DW_DMA_IS_SOFT_LLP, &dwc->flags) && residue)
739 +                               residue -= dwc_get_sent(dwc);
740 +               } else {
741 +                       residue = desc->total_len;
742 +               }
743 +       } else {
744 +               residue = 0;
745 +       }
746  
747         spin_unlock_irqrestore(&dwc->lock, flags);
748         return residue;
749 @@ -1078,10 +1064,12 @@ dwc_tx_status(struct dma_chan *chan,
750         dwc_scan_descriptors(to_dw_dma(chan->device), dwc);
751  
752         ret = dma_cookie_status(chan, cookie, txstate);
753 -       if (ret != DMA_COMPLETE)
754 -               dma_set_residue(txstate, dwc_get_residue(dwc));
755 +       if (ret == DMA_COMPLETE)
756 +               return ret;
757 +
758 +       dma_set_residue(txstate, dwc_get_residue(dwc, cookie));
759  
760 -       if (dwc->paused && ret == DMA_IN_PROGRESS)
761 +       if (test_bit(DW_DMA_IS_PAUSED, &dwc->flags) && ret == DMA_IN_PROGRESS)
762                 return DMA_PAUSED;
763  
764         return ret;
765 @@ -1102,7 +1090,7 @@ static void dwc_issue_pending(struct dma
766  
767  static void dw_dma_off(struct dw_dma *dw)
768  {
769 -       int i;
770 +       unsigned int i;
771  
772         dma_writel(dw, CFG, 0);
773  
774 @@ -1116,7 +1104,7 @@ static void dw_dma_off(struct dw_dma *dw
775                 cpu_relax();
776  
777         for (i = 0; i < dw->dma.chancnt; i++)
778 -               dw->chan[i].initialized = false;
779 +               clear_bit(DW_DMA_IS_INITIALIZED, &dw->chan[i].flags);
780  }
781  
782  static void dw_dma_on(struct dw_dma *dw)
783 @@ -1128,9 +1116,6 @@ static int dwc_alloc_chan_resources(stru
784  {
785         struct dw_dma_chan      *dwc = to_dw_dma_chan(chan);
786         struct dw_dma           *dw = to_dw_dma(chan->device);
787 -       struct dw_desc          *desc;
788 -       int                     i;
789 -       unsigned long           flags;
790  
791         dev_vdbg(chan2dev(chan), "%s\n", __func__);
792  
793 @@ -1161,48 +1146,13 @@ static int dwc_alloc_chan_resources(stru
794                 dw_dma_on(dw);
795         dw->in_use |= dwc->mask;
796  
797 -       spin_lock_irqsave(&dwc->lock, flags);
798 -       i = dwc->descs_allocated;
799 -       while (dwc->descs_allocated < NR_DESCS_PER_CHANNEL) {
800 -               dma_addr_t phys;
801 -
802 -               spin_unlock_irqrestore(&dwc->lock, flags);
803 -
804 -               desc = dma_pool_alloc(dw->desc_pool, GFP_ATOMIC, &phys);
805 -               if (!desc)
806 -                       goto err_desc_alloc;
807 -
808 -               memset(desc, 0, sizeof(struct dw_desc));
809 -
810 -               INIT_LIST_HEAD(&desc->tx_list);
811 -               dma_async_tx_descriptor_init(&desc->txd, chan);
812 -               desc->txd.tx_submit = dwc_tx_submit;
813 -               desc->txd.flags = DMA_CTRL_ACK;
814 -               desc->txd.phys = phys;
815 -
816 -               dwc_desc_put(dwc, desc);
817 -
818 -               spin_lock_irqsave(&dwc->lock, flags);
819 -               i = ++dwc->descs_allocated;
820 -       }
821 -
822 -       spin_unlock_irqrestore(&dwc->lock, flags);
823 -
824 -       dev_dbg(chan2dev(chan), "%s: allocated %d descriptors\n", __func__, i);
825 -
826 -       return i;
827 -
828 -err_desc_alloc:
829 -       dev_info(chan2dev(chan), "only allocated %d descriptors\n", i);
830 -
831 -       return i;
832 +       return 0;
833  }
834  
835  static void dwc_free_chan_resources(struct dma_chan *chan)
836  {
837         struct dw_dma_chan      *dwc = to_dw_dma_chan(chan);
838         struct dw_dma           *dw = to_dw_dma(chan->device);
839 -       struct dw_desc          *desc, *_desc;
840         unsigned long           flags;
841         LIST_HEAD(list);
842  
843 @@ -1215,17 +1165,15 @@ static void dwc_free_chan_resources(stru
844         BUG_ON(dma_readl(to_dw_dma(chan->device), CH_EN) & dwc->mask);
845  
846         spin_lock_irqsave(&dwc->lock, flags);
847 -       list_splice_init(&dwc->free_list, &list);
848 -       dwc->descs_allocated = 0;
849  
850         /* Clear custom channel configuration */
851         dwc->src_id = 0;
852         dwc->dst_id = 0;
853  
854 -       dwc->src_master = 0;
855 -       dwc->dst_master = 0;
856 +       dwc->m_master = 0;
857 +       dwc->p_master = 0;
858  
859 -       dwc->initialized = false;
860 +       clear_bit(DW_DMA_IS_INITIALIZED, &dwc->flags);
861  
862         /* Disable interrupts */
863         channel_clear_bit(dw, MASK.XFER, dwc->mask);
864 @@ -1239,11 +1187,6 @@ static void dwc_free_chan_resources(stru
865         if (!dw->in_use)
866                 dw_dma_off(dw);
867  
868 -       list_for_each_entry_safe(desc, _desc, &list, desc_node) {
869 -               dev_vdbg(chan2dev(chan), "  freeing descriptor %p\n", desc);
870 -               dma_pool_free(dw->desc_pool, desc, desc->txd.phys);
871 -       }
872 -
873         dev_vdbg(chan2dev(chan), "%s: done\n", __func__);
874  }
875  
876 @@ -1321,6 +1264,7 @@ struct dw_cyclic_desc *dw_dma_cyclic_pre
877         struct dw_cyclic_desc           *retval = NULL;
878         struct dw_desc                  *desc;
879         struct dw_desc                  *last = NULL;
880 +       u8                              lms = DWC_LLP_LMS(dwc->m_master);
881         unsigned long                   was_cyclic;
882         unsigned int                    reg_width;
883         unsigned int                    periods;
884 @@ -1374,9 +1318,6 @@ struct dw_cyclic_desc *dw_dma_cyclic_pre
885  
886         retval = ERR_PTR(-ENOMEM);
887  
888 -       if (periods > NR_DESCS_PER_CHANNEL)
889 -               goto out_err;
890 -
891         cdesc = kzalloc(sizeof(struct dw_cyclic_desc), GFP_KERNEL);
892         if (!cdesc)
893                 goto out_err;
894 @@ -1392,50 +1333,50 @@ struct dw_cyclic_desc *dw_dma_cyclic_pre
895  
896                 switch (direction) {
897                 case DMA_MEM_TO_DEV:
898 -                       desc->lli.dar = sconfig->dst_addr;
899 -                       desc->lli.sar = buf_addr + (period_len * i);
900 -                       desc->lli.ctllo = (DWC_DEFAULT_CTLLO(chan)
901 -                                       | DWC_CTLL_DST_WIDTH(reg_width)
902 -                                       | DWC_CTLL_SRC_WIDTH(reg_width)
903 -                                       | DWC_CTLL_DST_FIX
904 -                                       | DWC_CTLL_SRC_INC
905 -                                       | DWC_CTLL_INT_EN);
906 -
907 -                       desc->lli.ctllo |= sconfig->device_fc ?
908 -                               DWC_CTLL_FC(DW_DMA_FC_P_M2P) :
909 -                               DWC_CTLL_FC(DW_DMA_FC_D_M2P);
910 +                       lli_write(desc, dar, sconfig->dst_addr);
911 +                       lli_write(desc, sar, buf_addr + period_len * i);
912 +                       lli_write(desc, ctllo, (DWC_DEFAULT_CTLLO(chan)
913 +                               | DWC_CTLL_DST_WIDTH(reg_width)
914 +                               | DWC_CTLL_SRC_WIDTH(reg_width)
915 +                               | DWC_CTLL_DST_FIX
916 +                               | DWC_CTLL_SRC_INC
917 +                               | DWC_CTLL_INT_EN));
918 +
919 +                       lli_set(desc, ctllo, sconfig->device_fc ?
920 +                                       DWC_CTLL_FC(DW_DMA_FC_P_M2P) :
921 +                                       DWC_CTLL_FC(DW_DMA_FC_D_M2P));
922  
923                         break;
924                 case DMA_DEV_TO_MEM:
925 -                       desc->lli.dar = buf_addr + (period_len * i);
926 -                       desc->lli.sar = sconfig->src_addr;
927 -                       desc->lli.ctllo = (DWC_DEFAULT_CTLLO(chan)
928 -                                       | DWC_CTLL_SRC_WIDTH(reg_width)
929 -                                       | DWC_CTLL_DST_WIDTH(reg_width)
930 -                                       | DWC_CTLL_DST_INC
931 -                                       | DWC_CTLL_SRC_FIX
932 -                                       | DWC_CTLL_INT_EN);
933 -
934 -                       desc->lli.ctllo |= sconfig->device_fc ?
935 -                               DWC_CTLL_FC(DW_DMA_FC_P_P2M) :
936 -                               DWC_CTLL_FC(DW_DMA_FC_D_P2M);
937 +                       lli_write(desc, dar, buf_addr + period_len * i);
938 +                       lli_write(desc, sar, sconfig->src_addr);
939 +                       lli_write(desc, ctllo, (DWC_DEFAULT_CTLLO(chan)
940 +                               | DWC_CTLL_SRC_WIDTH(reg_width)
941 +                               | DWC_CTLL_DST_WIDTH(reg_width)
942 +                               | DWC_CTLL_DST_INC
943 +                               | DWC_CTLL_SRC_FIX
944 +                               | DWC_CTLL_INT_EN));
945 +
946 +                       lli_set(desc, ctllo, sconfig->device_fc ?
947 +                                       DWC_CTLL_FC(DW_DMA_FC_P_P2M) :
948 +                                       DWC_CTLL_FC(DW_DMA_FC_D_P2M));
949  
950                         break;
951                 default:
952                         break;
953                 }
954  
955 -               desc->lli.ctlhi = (period_len >> reg_width);
956 +               lli_write(desc, ctlhi, period_len >> reg_width);
957                 cdesc->desc[i] = desc;
958  
959                 if (last)
960 -                       last->lli.llp = desc->txd.phys;
961 +                       lli_write(last, llp, desc->txd.phys | lms);
962  
963                 last = desc;
964         }
965  
966         /* Let's make a cyclic list */
967 -       last->lli.llp = cdesc->desc[0]->txd.phys;
968 +       lli_write(last, llp, cdesc->desc[0]->txd.phys | lms);
969  
970         dev_dbg(chan2dev(&dwc->chan),
971                         "cyclic prepared buf %pad len %zu period %zu periods %d\n",
972 @@ -1466,7 +1407,7 @@ void dw_dma_cyclic_free(struct dma_chan
973         struct dw_dma_chan      *dwc = to_dw_dma_chan(chan);
974         struct dw_dma           *dw = to_dw_dma(dwc->chan.device);
975         struct dw_cyclic_desc   *cdesc = dwc->cdesc;
976 -       int                     i;
977 +       unsigned int            i;
978         unsigned long           flags;
979  
980         dev_dbg(chan2dev(&dwc->chan), "%s\n", __func__);
981 @@ -1490,32 +1431,38 @@ void dw_dma_cyclic_free(struct dma_chan
982         kfree(cdesc->desc);
983         kfree(cdesc);
984  
985 +       dwc->cdesc = NULL;
986 +
987         clear_bit(DW_DMA_IS_CYCLIC, &dwc->flags);
988  }
989  EXPORT_SYMBOL(dw_dma_cyclic_free);
990  
991  /*----------------------------------------------------------------------*/
992  
993 -int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata)
994 +int dw_dma_probe(struct dw_dma_chip *chip)
995  {
996 +       struct dw_dma_platform_data *pdata;
997         struct dw_dma           *dw;
998         bool                    autocfg = false;
999         unsigned int            dw_params;
1000 -       unsigned int            max_blk_size = 0;
1001 +       unsigned int            i;
1002         int                     err;
1003 -       int                     i;
1004  
1005         dw = devm_kzalloc(chip->dev, sizeof(*dw), GFP_KERNEL);
1006         if (!dw)
1007                 return -ENOMEM;
1008  
1009 +       dw->pdata = devm_kzalloc(chip->dev, sizeof(*dw->pdata), GFP_KERNEL);
1010 +       if (!dw->pdata)
1011 +               return -ENOMEM;
1012 +
1013         dw->regs = chip->regs;
1014         chip->dw = dw;
1015  
1016         pm_runtime_get_sync(chip->dev);
1017  
1018 -       if (!pdata) {
1019 -               dw_params = dma_read_byaddr(chip->regs, DW_PARAMS);
1020 +       if (!chip->pdata) {
1021 +               dw_params = dma_readl(dw, DW_PARAMS);
1022                 dev_dbg(chip->dev, "DW_PARAMS: 0x%08x\n", dw_params);
1023  
1024                 autocfg = dw_params >> DW_PARAMS_EN & 1;
1025 @@ -1524,29 +1471,31 @@ int dw_dma_probe(struct dw_dma_chip *chi
1026                         goto err_pdata;
1027                 }
1028  
1029 -               pdata = devm_kzalloc(chip->dev, sizeof(*pdata), GFP_KERNEL);
1030 -               if (!pdata) {
1031 -                       err = -ENOMEM;
1032 -                       goto err_pdata;
1033 -               }
1034 +               /* Reassign the platform data pointer */
1035 +               pdata = dw->pdata;
1036  
1037                 /* Get hardware configuration parameters */
1038                 pdata->nr_channels = (dw_params >> DW_PARAMS_NR_CHAN & 7) + 1;
1039                 pdata->nr_masters = (dw_params >> DW_PARAMS_NR_MASTER & 3) + 1;
1040                 for (i = 0; i < pdata->nr_masters; i++) {
1041                         pdata->data_width[i] =
1042 -                               (dw_params >> DW_PARAMS_DATA_WIDTH(i) & 3) + 2;
1043 +                               4 << (dw_params >> DW_PARAMS_DATA_WIDTH(i) & 3);
1044                 }
1045 -               max_blk_size = dma_readl(dw, MAX_BLK_SIZE);
1046 +               pdata->block_size = dma_readl(dw, MAX_BLK_SIZE);
1047  
1048                 /* Fill platform data with the default values */
1049                 pdata->is_private = true;
1050                 pdata->is_memcpy = true;
1051                 pdata->chan_allocation_order = CHAN_ALLOCATION_ASCENDING;
1052                 pdata->chan_priority = CHAN_PRIORITY_ASCENDING;
1053 -       } else if (pdata->nr_channels > DW_DMA_MAX_NR_CHANNELS) {
1054 +       } else if (chip->pdata->nr_channels > DW_DMA_MAX_NR_CHANNELS) {
1055                 err = -EINVAL;
1056                 goto err_pdata;
1057 +       } else {
1058 +               memcpy(dw->pdata, chip->pdata, sizeof(*dw->pdata));
1059 +
1060 +               /* Reassign the platform data pointer */
1061 +               pdata = dw->pdata;
1062         }
1063  
1064         dw->chan = devm_kcalloc(chip->dev, pdata->nr_channels, sizeof(*dw->chan),
1065 @@ -1556,11 +1505,6 @@ int dw_dma_probe(struct dw_dma_chip *chi
1066                 goto err_pdata;
1067         }
1068  
1069 -       /* Get hardware configuration parameters */
1070 -       dw->nr_masters = pdata->nr_masters;
1071 -       for (i = 0; i < dw->nr_masters; i++)
1072 -               dw->data_width[i] = pdata->data_width[i];
1073 -
1074         /* Calculate all channel mask before DMA setup */
1075         dw->all_chan_mask = (1 << pdata->nr_channels) - 1;
1076  
1077 @@ -1607,7 +1551,6 @@ int dw_dma_probe(struct dw_dma_chip *chi
1078  
1079                 INIT_LIST_HEAD(&dwc->active_list);
1080                 INIT_LIST_HEAD(&dwc->queue);
1081 -               INIT_LIST_HEAD(&dwc->free_list);
1082  
1083                 channel_clear_bit(dw, CH_EN, dwc->mask);
1084  
1085 @@ -1615,11 +1558,9 @@ int dw_dma_probe(struct dw_dma_chip *chi
1086  
1087                 /* Hardware configuration */
1088                 if (autocfg) {
1089 -                       unsigned int dwc_params;
1090                         unsigned int r = DW_DMA_MAX_NR_CHANNELS - i - 1;
1091 -                       void __iomem *addr = chip->regs + r * sizeof(u32);
1092 -
1093 -                       dwc_params = dma_read_byaddr(addr, DWC_PARAMS);
1094 +                       void __iomem *addr = &__dw_regs(dw)->DWC_PARAMS[r];
1095 +                       unsigned int dwc_params = dma_readl_native(addr);
1096  
1097                         dev_dbg(chip->dev, "DWC_PARAMS[%d]: 0x%08x\n", i,
1098                                            dwc_params);
1099 @@ -1630,16 +1571,15 @@ int dw_dma_probe(struct dw_dma_chip *chi
1100                          * up to 0x0a for 4095.
1101                          */
1102                         dwc->block_size =
1103 -                               (4 << ((max_blk_size >> 4 * i) & 0xf)) - 1;
1104 +                               (4 << ((pdata->block_size >> 4 * i) & 0xf)) - 1;
1105                         dwc->nollp =
1106                                 (dwc_params >> DWC_PARAMS_MBLK_EN & 0x1) == 0;
1107                 } else {
1108                         dwc->block_size = pdata->block_size;
1109  
1110                         /* Check if channel supports multi block transfer */
1111 -                       channel_writel(dwc, LLP, 0xfffffffc);
1112 -                       dwc->nollp =
1113 -                               (channel_readl(dwc, LLP) & 0xfffffffc) == 0;
1114 +                       channel_writel(dwc, LLP, DWC_LLP_LOC(0xffffffff));
1115 +                       dwc->nollp = DWC_LLP_LOC(channel_readl(dwc, LLP)) == 0;
1116                         channel_writel(dwc, LLP, 0);
1117                 }
1118         }
1119 --- a/drivers/dma/dw/pci.c
1120 +++ b/drivers/dma/dw/pci.c
1121 @@ -17,8 +17,8 @@
1122  
1123  static int dw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *pid)
1124  {
1125 +       const struct dw_dma_platform_data *pdata = (void *)pid->driver_data;
1126         struct dw_dma_chip *chip;
1127 -       struct dw_dma_platform_data *pdata = (void *)pid->driver_data;
1128         int ret;
1129  
1130         ret = pcim_enable_device(pdev);
1131 @@ -49,8 +49,9 @@ static int dw_pci_probe(struct pci_dev *
1132         chip->dev = &pdev->dev;
1133         chip->regs = pcim_iomap_table(pdev)[0];
1134         chip->irq = pdev->irq;
1135 +       chip->pdata = pdata;
1136  
1137 -       ret = dw_dma_probe(chip, pdata);
1138 +       ret = dw_dma_probe(chip);
1139         if (ret)
1140                 return ret;
1141  
1142 @@ -108,6 +109,10 @@ static const struct pci_device_id dw_pci
1143  
1144         /* Haswell */
1145         { PCI_VDEVICE(INTEL, 0x9c60) },
1146 +
1147 +       /* Broadwell */
1148 +       { PCI_VDEVICE(INTEL, 0x9ce0) },
1149 +
1150         { }
1151  };
1152  MODULE_DEVICE_TABLE(pci, dw_pci_id_table);
1153 --- a/drivers/dma/dw/platform.c
1154 +++ b/drivers/dma/dw/platform.c
1155 @@ -42,13 +42,13 @@ static struct dma_chan *dw_dma_of_xlate(
1156  
1157         slave.src_id = dma_spec->args[0];
1158         slave.dst_id = dma_spec->args[0];
1159 -       slave.src_master = dma_spec->args[1];
1160 -       slave.dst_master = dma_spec->args[2];
1161 +       slave.m_master = dma_spec->args[1];
1162 +       slave.p_master = dma_spec->args[2];
1163  
1164         if (WARN_ON(slave.src_id >= DW_DMA_MAX_NR_REQUESTS ||
1165                     slave.dst_id >= DW_DMA_MAX_NR_REQUESTS ||
1166 -                   slave.src_master >= dw->nr_masters ||
1167 -                   slave.dst_master >= dw->nr_masters))
1168 +                   slave.m_master >= dw->pdata->nr_masters ||
1169 +                   slave.p_master >= dw->pdata->nr_masters))
1170                 return NULL;
1171  
1172         dma_cap_zero(cap);
1173 @@ -66,8 +66,8 @@ static bool dw_dma_acpi_filter(struct dm
1174                 .dma_dev = dma_spec->dev,
1175                 .src_id = dma_spec->slave_id,
1176                 .dst_id = dma_spec->slave_id,
1177 -               .src_master = 1,
1178 -               .dst_master = 0,
1179 +               .m_master = 0,
1180 +               .p_master = 1,
1181         };
1182  
1183         return dw_dma_filter(chan, &slave);
1184 @@ -103,18 +103,28 @@ dw_dma_parse_dt(struct platform_device *
1185         struct device_node *np = pdev->dev.of_node;
1186         struct dw_dma_platform_data *pdata;
1187         u32 tmp, arr[DW_DMA_MAX_NR_MASTERS];
1188 +       u32 nr_masters;
1189 +       u32 nr_channels;
1190  
1191         if (!np) {
1192                 dev_err(&pdev->dev, "Missing DT data\n");
1193                 return NULL;
1194         }
1195  
1196 +       if (of_property_read_u32(np, "dma-masters", &nr_masters))
1197 +               return NULL;
1198 +       if (nr_masters < 1 || nr_masters > DW_DMA_MAX_NR_MASTERS)
1199 +               return NULL;
1200 +
1201 +       if (of_property_read_u32(np, "dma-channels", &nr_channels))
1202 +               return NULL;
1203 +
1204         pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
1205         if (!pdata)
1206                 return NULL;
1207  
1208 -       if (of_property_read_u32(np, "dma-channels", &pdata->nr_channels))
1209 -               return NULL;
1210 +       pdata->nr_masters = nr_masters;
1211 +       pdata->nr_channels = nr_channels;
1212  
1213         if (of_property_read_bool(np, "is_private"))
1214                 pdata->is_private = true;
1215 @@ -128,17 +138,13 @@ dw_dma_parse_dt(struct platform_device *
1216         if (!of_property_read_u32(np, "block_size", &tmp))
1217                 pdata->block_size = tmp;
1218  
1219 -       if (!of_property_read_u32(np, "dma-masters", &tmp)) {
1220 -               if (tmp > DW_DMA_MAX_NR_MASTERS)
1221 -                       return NULL;
1222 -
1223 -               pdata->nr_masters = tmp;
1224 -       }
1225 -
1226 -       if (!of_property_read_u32_array(np, "data_width", arr,
1227 -                               pdata->nr_masters))
1228 -               for (tmp = 0; tmp < pdata->nr_masters; tmp++)
1229 +       if (!of_property_read_u32_array(np, "data-width", arr, nr_masters)) {
1230 +               for (tmp = 0; tmp < nr_masters; tmp++)
1231                         pdata->data_width[tmp] = arr[tmp];
1232 +       } else if (!of_property_read_u32_array(np, "data_width", arr, nr_masters)) {
1233 +               for (tmp = 0; tmp < nr_masters; tmp++)
1234 +                       pdata->data_width[tmp] = BIT(arr[tmp] & 0x07);
1235 +       }
1236  
1237         return pdata;
1238  }
1239 @@ -155,8 +161,7 @@ static int dw_probe(struct platform_devi
1240         struct dw_dma_chip *chip;
1241         struct device *dev = &pdev->dev;
1242         struct resource *mem;
1243 -       const struct acpi_device_id *id;
1244 -       struct dw_dma_platform_data *pdata;
1245 +       const struct dw_dma_platform_data *pdata;
1246         int err;
1247  
1248         chip = devm_kzalloc(dev, sizeof(*chip), GFP_KERNEL);
1249 @@ -179,13 +184,9 @@ static int dw_probe(struct platform_devi
1250         pdata = dev_get_platdata(dev);
1251         if (!pdata)
1252                 pdata = dw_dma_parse_dt(pdev);
1253 -       if (!pdata && has_acpi_companion(dev)) {
1254 -               id = acpi_match_device(dev->driver->acpi_match_table, dev);
1255 -               if (id)
1256 -                       pdata = (struct dw_dma_platform_data *)id->driver_data;
1257 -       }
1258  
1259         chip->dev = dev;
1260 +       chip->pdata = pdata;
1261  
1262         chip->clk = devm_clk_get(chip->dev, "hclk");
1263         if (IS_ERR(chip->clk))
1264 @@ -196,7 +197,7 @@ static int dw_probe(struct platform_devi
1265  
1266         pm_runtime_enable(&pdev->dev);
1267  
1268 -       err = dw_dma_probe(chip, pdata);
1269 +       err = dw_dma_probe(chip);
1270         if (err)
1271                 goto err_dw_dma_probe;
1272  
1273 @@ -239,7 +240,19 @@ static void dw_shutdown(struct platform_
1274  {
1275         struct dw_dma_chip *chip = platform_get_drvdata(pdev);
1276  
1277 +       /*
1278 +        * We have to call dw_dma_disable() to stop any ongoing transfer. On
1279 +        * some platforms we can't do that since DMA device is powered off.
1280 +        * Moreover we have no possibility to check if the platform is affected
1281 +        * or not. That's why we call pm_runtime_get_sync() / pm_runtime_put()
1282 +        * unconditionally. On the other hand we can't use
1283 +        * pm_runtime_suspended() because runtime PM framework is not fully
1284 +        * used by the driver.
1285 +        */
1286 +       pm_runtime_get_sync(chip->dev);
1287         dw_dma_disable(chip);
1288 +       pm_runtime_put_sync_suspend(chip->dev);
1289 +
1290         clk_disable_unprepare(chip->clk);
1291  }
1292  
1293 @@ -252,17 +265,8 @@ MODULE_DEVICE_TABLE(of, dw_dma_of_id_tab
1294  #endif
1295  
1296  #ifdef CONFIG_ACPI
1297 -static struct dw_dma_platform_data dw_dma_acpi_pdata = {
1298 -       .nr_channels = 8,
1299 -       .is_private = true,
1300 -       .chan_allocation_order = CHAN_ALLOCATION_ASCENDING,
1301 -       .chan_priority = CHAN_PRIORITY_ASCENDING,
1302 -       .block_size = 4095,
1303 -       .nr_masters = 2,
1304 -};
1305 -
1306  static const struct acpi_device_id dw_dma_acpi_id_table[] = {
1307 -       { "INTL9C60", (kernel_ulong_t)&dw_dma_acpi_pdata },
1308 +       { "INTL9C60", 0 },
1309         { }
1310  };
1311  MODULE_DEVICE_TABLE(acpi, dw_dma_acpi_id_table);
1312 --- a/drivers/dma/dw/regs.h
1313 +++ b/drivers/dma/dw/regs.h
1314 @@ -114,10 +114,6 @@ struct dw_dma_regs {
1315  #define dma_writel_native writel
1316  #endif
1317  
1318 -/* To access the registers in early stage of probe */
1319 -#define dma_read_byaddr(addr, name) \
1320 -       dma_readl_native((addr) + offsetof(struct dw_dma_regs, name))
1321 -
1322  /* Bitfields in DW_PARAMS */
1323  #define DW_PARAMS_NR_CHAN      8               /* number of channels */
1324  #define DW_PARAMS_NR_MASTER    11              /* number of AHB masters */
1325 @@ -143,6 +139,10 @@ enum dw_dma_msize {
1326         DW_DMA_MSIZE_256,
1327  };
1328  
1329 +/* Bitfields in LLP */
1330 +#define DWC_LLP_LMS(x)         ((x) & 3)       /* list master select */
1331 +#define DWC_LLP_LOC(x)         ((x) & ~3)      /* next lli */
1332 +
1333  /* Bitfields in CTL_LO */
1334  #define DWC_CTLL_INT_EN                (1 << 0)        /* irqs enabled? */
1335  #define DWC_CTLL_DST_WIDTH(n)  ((n)<<1)        /* bytes per element */
1336 @@ -150,7 +150,7 @@ enum dw_dma_msize {
1337  #define DWC_CTLL_DST_INC       (0<<7)          /* DAR update/not */
1338  #define DWC_CTLL_DST_DEC       (1<<7)
1339  #define DWC_CTLL_DST_FIX       (2<<7)
1340 -#define DWC_CTLL_SRC_INC       (0<<7)          /* SAR update/not */
1341 +#define DWC_CTLL_SRC_INC       (0<<9)          /* SAR update/not */
1342  #define DWC_CTLL_SRC_DEC       (1<<9)
1343  #define DWC_CTLL_SRC_FIX       (2<<9)
1344  #define DWC_CTLL_DST_MSIZE(n)  ((n)<<11)       /* burst, #elements */
1345 @@ -216,6 +216,8 @@ enum dw_dma_msize {
1346  enum dw_dmac_flags {
1347         DW_DMA_IS_CYCLIC = 0,
1348         DW_DMA_IS_SOFT_LLP = 1,
1349 +       DW_DMA_IS_PAUSED = 2,
1350 +       DW_DMA_IS_INITIALIZED = 3,
1351  };
1352  
1353  struct dw_dma_chan {
1354 @@ -224,8 +226,6 @@ struct dw_dma_chan {
1355         u8                              mask;
1356         u8                              priority;
1357         enum dma_transfer_direction     direction;
1358 -       bool                            paused;
1359 -       bool                            initialized;
1360  
1361         /* software emulation of the LLP transfers */
1362         struct list_head        *tx_node_active;
1363 @@ -236,8 +236,6 @@ struct dw_dma_chan {
1364         unsigned long           flags;
1365         struct list_head        active_list;
1366         struct list_head        queue;
1367 -       struct list_head        free_list;
1368 -       u32                     residue;
1369         struct dw_cyclic_desc   *cdesc;
1370  
1371         unsigned int            descs_allocated;
1372 @@ -249,8 +247,8 @@ struct dw_dma_chan {
1373         /* custom slave configuration */
1374         u8                      src_id;
1375         u8                      dst_id;
1376 -       u8                      src_master;
1377 -       u8                      dst_master;
1378 +       u8                      m_master;
1379 +       u8                      p_master;
1380  
1381         /* configuration passed via .device_config */
1382         struct dma_slave_config dma_sconfig;
1383 @@ -283,9 +281,8 @@ struct dw_dma {
1384         u8                      all_chan_mask;
1385         u8                      in_use;
1386  
1387 -       /* hardware configuration */
1388 -       unsigned char           nr_masters;
1389 -       unsigned char           data_width[DW_DMA_MAX_NR_MASTERS];
1390 +       /* platform data */
1391 +       struct dw_dma_platform_data     *pdata;
1392  };
1393  
1394  static inline struct dw_dma_regs __iomem *__dw_regs(struct dw_dma *dw)
1395 @@ -308,32 +305,51 @@ static inline struct dw_dma *to_dw_dma(s
1396         return container_of(ddev, struct dw_dma, dma);
1397  }
1398  
1399 +#ifdef CONFIG_DW_DMAC_BIG_ENDIAN_IO
1400 +typedef __be32 __dw32;
1401 +#else
1402 +typedef __le32 __dw32;
1403 +#endif
1404 +
1405  /* LLI == Linked List Item; a.k.a. DMA block descriptor */
1406  struct dw_lli {
1407         /* values that are not changed by hardware */
1408 -       u32             sar;
1409 -       u32             dar;
1410 -       u32             llp;            /* chain to next lli */
1411 -       u32             ctllo;
1412 +       __dw32          sar;
1413 +       __dw32          dar;
1414 +       __dw32          llp;            /* chain to next lli */
1415 +       __dw32          ctllo;
1416         /* values that may get written back: */
1417 -       u32             ctlhi;
1418 +       __dw32          ctlhi;
1419         /* sstat and dstat can snapshot peripheral register state.
1420          * silicon config may discard either or both...
1421          */
1422 -       u32             sstat;
1423 -       u32             dstat;
1424 +       __dw32          sstat;
1425 +       __dw32          dstat;
1426  };
1427  
1428  struct dw_desc {
1429         /* FIRST values the hardware uses */
1430         struct dw_lli                   lli;
1431  
1432 +#ifdef CONFIG_DW_DMAC_BIG_ENDIAN_IO
1433 +#define lli_set(d, reg, v)             ((d)->lli.reg |= cpu_to_be32(v))
1434 +#define lli_clear(d, reg, v)           ((d)->lli.reg &= ~cpu_to_be32(v))
1435 +#define lli_read(d, reg)               be32_to_cpu((d)->lli.reg)
1436 +#define lli_write(d, reg, v)           ((d)->lli.reg = cpu_to_be32(v))
1437 +#else
1438 +#define lli_set(d, reg, v)             ((d)->lli.reg |= cpu_to_le32(v))
1439 +#define lli_clear(d, reg, v)           ((d)->lli.reg &= ~cpu_to_le32(v))
1440 +#define lli_read(d, reg)               le32_to_cpu((d)->lli.reg)
1441 +#define lli_write(d, reg, v)           ((d)->lli.reg = cpu_to_le32(v))
1442 +#endif
1443 +
1444         /* THEN values for driver housekeeping */
1445         struct list_head                desc_node;
1446         struct list_head                tx_list;
1447         struct dma_async_tx_descriptor  txd;
1448         size_t                          len;
1449         size_t                          total_len;
1450 +       u32                             residue;
1451  };
1452  
1453  #define to_dw_desc(h)  list_entry(h, struct dw_desc, desc_node)
1454 --- a/include/linux/dma/dw.h
1455 +++ b/include/linux/dma/dw.h
1456 @@ -27,6 +27,7 @@ struct dw_dma;
1457   * @regs:              memory mapped I/O space
1458   * @clk:               hclk clock
1459   * @dw:                        struct dw_dma that is filed by dw_dma_probe()
1460 + * @pdata:             pointer to platform data
1461   */
1462  struct dw_dma_chip {
1463         struct device   *dev;
1464 @@ -34,10 +35,12 @@ struct dw_dma_chip {
1465         void __iomem    *regs;
1466         struct clk      *clk;
1467         struct dw_dma   *dw;
1468 +
1469 +       const struct dw_dma_platform_data       *pdata;
1470  };
1471  
1472  /* Export to the platform drivers */
1473 -int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata);
1474 +int dw_dma_probe(struct dw_dma_chip *chip);
1475  int dw_dma_remove(struct dw_dma_chip *chip);
1476  
1477  /* DMA API extensions */
1478 --- a/include/linux/platform_data/dma-dw.h
1479 +++ b/include/linux/platform_data/dma-dw.h
1480 @@ -21,15 +21,15 @@
1481   * @dma_dev:   required DMA master device
1482   * @src_id:    src request line
1483   * @dst_id:    dst request line
1484 - * @src_master: src master for transfers on allocated channel.
1485 - * @dst_master: dest master for transfers on allocated channel.
1486 + * @m_master:  memory master for transfers on allocated channel
1487 + * @p_master:  peripheral master for transfers on allocated channel
1488   */
1489  struct dw_dma_slave {
1490         struct device           *dma_dev;
1491         u8                      src_id;
1492         u8                      dst_id;
1493 -       u8                      src_master;
1494 -       u8                      dst_master;
1495 +       u8                      m_master;
1496 +       u8                      p_master;
1497  };
1498  
1499  /**
1500 @@ -43,7 +43,7 @@ struct dw_dma_slave {
1501   * @block_size: Maximum block size supported by the controller
1502   * @nr_masters: Number of AHB masters supported by the controller
1503   * @data_width: Maximum data width supported by hardware per AHB master
1504 - *             (0 - 8bits, 1 - 16bits, ..., 5 - 256bits)
1505 + *             (in bytes, power of 2)
1506   */
1507  struct dw_dma_platform_data {
1508         unsigned int    nr_channels;
1509 @@ -55,7 +55,7 @@ struct dw_dma_platform_data {
1510  #define CHAN_PRIORITY_ASCENDING                0       /* chan0 highest */
1511  #define CHAN_PRIORITY_DESCENDING       1       /* chan7 highest */
1512         unsigned char   chan_priority;
1513 -       unsigned short  block_size;
1514 +       unsigned int    block_size;
1515         unsigned char   nr_masters;
1516         unsigned char   data_width[DW_DMA_MAX_NR_MASTERS];
1517  };