881d08c7e5ecd5d1c00a702f76ffb0dfd25e53b2
[oweals/openwrt.git] / target / linux / ipq40xx / patches-4.14 / 040-dmaengine-qcom-bam-Process-multiple-pending-descript.patch
1 From 6b4faeac05bc0b91616b921191cb054d1376f3b4 Mon Sep 17 00:00:00 2001
2 From: Sricharan R <sricharan@codeaurora.org>
3 Date: Mon, 28 Aug 2017 20:30:24 +0530
4 Subject: [PATCH] dmaengine: qcom-bam: Process multiple pending descriptors
5
6 The bam dmaengine has a circular FIFO to which we
7 add hw descriptors that describes the transaction.
8 The FIFO has space for about 4096 hw descriptors.
9
10 Currently we add one descriptor and wait for it to
11 complete with interrupt and then add the next pending
12 descriptor. In this way, the FIFO is underutilized
13 since only one descriptor is processed at a time, although
14 there is space in FIFO for the BAM to process more.
15
16 Instead keep adding descriptors to FIFO till its full,
17 that allows BAM to continue to work on the next descriptor
18 immediately after signalling completion interrupt for the
19 previous descriptor.
20
21 Also when the client has not set the DMA_PREP_INTERRUPT for
22 a descriptor, then do not configure BAM to trigger a interrupt
23 upon completion of that descriptor. This way we get a interrupt
24 only for the descriptor for which DMA_PREP_INTERRUPT was
25 requested and there signal completion of all the previous completed
26 descriptors. So we still do callbacks for all requested descriptors,
27 but just that the number of interrupts are reduced.
28
29 CURRENT:
30
31             ------      -------   ---------------
32             |DES 0|     |DESC 1|  |DESC 2 + INT |
33             ------      -------   ---------------
34                |           |            |
35                |           |            |
36 INTERRUPT:   (INT)       (INT)        (INT)
37 CALLBACK:     (CB)        (CB)         (CB)
38
39                 MTD_SPEEDTEST READ PAGE: 3560 KiB/s
40                 MTD_SPEEDTEST WRITE PAGE: 2664 KiB/s
41                 IOZONE READ: 2456 KB/s
42                 IOZONE WRITE: 1230 KB/s
43
44         bam dma interrupts (after tests): 96508
45
46 CHANGE:
47
48         ------  -------    -------------
49         |DES 0| |DESC 1   |DESC 2 + INT |
50         ------  -------   --------------
51                                 |
52                                 |
53                               (INT)
54                               (CB for 0, 1, 2)
55
56                 MTD_SPEEDTEST READ PAGE: 3860 KiB/s
57                 MTD_SPEEDTEST WRITE PAGE: 2837 KiB/s
58                 IOZONE READ: 2677 KB/s
59                 IOZONE WRITE: 1308 KB/s
60
61         bam dma interrupts (after tests): 58806
62
63 Signed-off-by: Sricharan R <sricharan@codeaurora.org>
64 Reviewed-by: Andy Gross <andy.gross@linaro.org>
65 Tested-by: Abhishek Sahu <absahu@codeaurora.org>
66 Signed-off-by: Vinod Koul <vinod.koul@intel.com>
67 ---
68  drivers/dma/qcom/bam_dma.c | 169 +++++++++++++++++++++++++++++----------------
69  1 file changed, 109 insertions(+), 60 deletions(-)
70
71 --- a/drivers/dma/qcom/bam_dma.c
72 +++ b/drivers/dma/qcom/bam_dma.c
73 @@ -46,6 +46,7 @@
74  #include <linux/of_address.h>
75  #include <linux/of_irq.h>
76  #include <linux/of_dma.h>
77 +#include <linux/circ_buf.h>
78  #include <linux/clk.h>
79  #include <linux/dmaengine.h>
80  #include <linux/pm_runtime.h>
81 @@ -78,6 +79,8 @@ struct bam_async_desc {
82  
83         struct bam_desc_hw *curr_desc;
84  
85 +       /* list node for the desc in the bam_chan list of descriptors */
86 +       struct list_head desc_node;
87         enum dma_transfer_direction dir;
88         size_t length;
89         struct bam_desc_hw desc[0];
90 @@ -347,6 +350,8 @@ static const struct reg_offset_data bam_
91  #define BAM_DESC_FIFO_SIZE     SZ_32K
92  #define MAX_DESCRIPTORS (BAM_DESC_FIFO_SIZE / sizeof(struct bam_desc_hw) - 1)
93  #define BAM_FIFO_SIZE  (SZ_32K - 8)
94 +#define IS_BUSY(chan)  (CIRC_SPACE(bchan->tail, bchan->head,\
95 +                        MAX_DESCRIPTORS + 1) == 0)
96  
97  struct bam_chan {
98         struct virt_dma_chan vc;
99 @@ -356,8 +361,6 @@ struct bam_chan {
100         /* configuration from device tree */
101         u32 id;
102  
103 -       struct bam_async_desc *curr_txd;        /* current running dma */
104 -
105         /* runtime configuration */
106         struct dma_slave_config slave;
107  
108 @@ -372,6 +375,8 @@ struct bam_chan {
109         unsigned int initialized;       /* is the channel hw initialized? */
110         unsigned int paused;            /* is the channel paused? */
111         unsigned int reconfigure;       /* new slave config? */
112 +       /* list of descriptors currently processed */
113 +       struct list_head desc_list;
114  
115         struct list_head node;
116  };
117 @@ -540,7 +545,7 @@ static void bam_free_chan(struct dma_cha
118  
119         vchan_free_chan_resources(to_virt_chan(chan));
120  
121 -       if (bchan->curr_txd) {
122 +       if (!list_empty(&bchan->desc_list)) {
123                 dev_err(bchan->bdev->dev, "Cannot free busy channel\n");
124                 goto err;
125         }
126 @@ -633,8 +638,6 @@ static struct dma_async_tx_descriptor *b
127  
128         if (flags & DMA_PREP_INTERRUPT)
129                 async_desc->flags |= DESC_FLAG_EOT;
130 -       else
131 -               async_desc->flags |= DESC_FLAG_INT;
132  
133         async_desc->num_desc = num_alloc;
134         async_desc->curr_desc = async_desc->desc;
135 @@ -685,14 +688,16 @@ err_out:
136  static int bam_dma_terminate_all(struct dma_chan *chan)
137  {
138         struct bam_chan *bchan = to_bam_chan(chan);
139 +       struct bam_async_desc *async_desc, *tmp;
140         unsigned long flag;
141         LIST_HEAD(head);
142  
143         /* remove all transactions, including active transaction */
144         spin_lock_irqsave(&bchan->vc.lock, flag);
145 -       if (bchan->curr_txd) {
146 -               list_add(&bchan->curr_txd->vd.node, &bchan->vc.desc_issued);
147 -               bchan->curr_txd = NULL;
148 +       list_for_each_entry_safe(async_desc, tmp,
149 +                                &bchan->desc_list, desc_node) {
150 +               list_add(&async_desc->vd.node, &bchan->vc.desc_issued);
151 +               list_del(&async_desc->desc_node);
152         }
153  
154         vchan_get_all_descriptors(&bchan->vc, &head);
155 @@ -764,9 +769,9 @@ static int bam_resume(struct dma_chan *c
156   */
157  static u32 process_channel_irqs(struct bam_device *bdev)
158  {
159 -       u32 i, srcs, pipe_stts;
160 +       u32 i, srcs, pipe_stts, offset, avail;
161         unsigned long flags;
162 -       struct bam_async_desc *async_desc;
163 +       struct bam_async_desc *async_desc, *tmp;
164  
165         srcs = readl_relaxed(bam_addr(bdev, 0, BAM_IRQ_SRCS_EE));
166  
167 @@ -786,27 +791,40 @@ static u32 process_channel_irqs(struct b
168                 writel_relaxed(pipe_stts, bam_addr(bdev, i, BAM_P_IRQ_CLR));
169  
170                 spin_lock_irqsave(&bchan->vc.lock, flags);
171 -               async_desc = bchan->curr_txd;
172  
173 -               if (async_desc) {
174 -                       async_desc->num_desc -= async_desc->xfer_len;
175 -                       async_desc->curr_desc += async_desc->xfer_len;
176 -                       bchan->curr_txd = NULL;
177 +               offset = readl_relaxed(bam_addr(bdev, i, BAM_P_SW_OFSTS)) &
178 +                                      P_SW_OFSTS_MASK;
179 +               offset /= sizeof(struct bam_desc_hw);
180 +
181 +               /* Number of bytes available to read */
182 +               avail = CIRC_CNT(offset, bchan->head, MAX_DESCRIPTORS + 1);
183 +
184 +               list_for_each_entry_safe(async_desc, tmp,
185 +                                        &bchan->desc_list, desc_node) {
186 +                       /* Not enough data to read */
187 +                       if (avail < async_desc->xfer_len)
188 +                               break;
189  
190                         /* manage FIFO */
191                         bchan->head += async_desc->xfer_len;
192                         bchan->head %= MAX_DESCRIPTORS;
193  
194 +                       async_desc->num_desc -= async_desc->xfer_len;
195 +                       async_desc->curr_desc += async_desc->xfer_len;
196 +                       avail -= async_desc->xfer_len;
197 +
198                         /*
199 -                        * if complete, process cookie.  Otherwise
200 +                        * if complete, process cookie. Otherwise
201                          * push back to front of desc_issued so that
202                          * it gets restarted by the tasklet
203                          */
204 -                       if (!async_desc->num_desc)
205 +                       if (!async_desc->num_desc) {
206                                 vchan_cookie_complete(&async_desc->vd);
207 -                       else
208 +                       } else {
209                                 list_add(&async_desc->vd.node,
210 -                                       &bchan->vc.desc_issued);
211 +                                        &bchan->vc.desc_issued);
212 +                       }
213 +                       list_del(&async_desc->desc_node);
214                 }
215  
216                 spin_unlock_irqrestore(&bchan->vc.lock, flags);
217 @@ -868,6 +886,7 @@ static enum dma_status bam_tx_status(str
218                 struct dma_tx_state *txstate)
219  {
220         struct bam_chan *bchan = to_bam_chan(chan);
221 +       struct bam_async_desc *async_desc;
222         struct virt_dma_desc *vd;
223         int ret;
224         size_t residue = 0;
225 @@ -883,11 +902,17 @@ static enum dma_status bam_tx_status(str
226  
227         spin_lock_irqsave(&bchan->vc.lock, flags);
228         vd = vchan_find_desc(&bchan->vc, cookie);
229 -       if (vd)
230 +       if (vd) {
231                 residue = container_of(vd, struct bam_async_desc, vd)->length;
232 -       else if (bchan->curr_txd && bchan->curr_txd->vd.tx.cookie == cookie)
233 -               for (i = 0; i < bchan->curr_txd->num_desc; i++)
234 -                       residue += bchan->curr_txd->curr_desc[i].size;
235 +       } else {
236 +               list_for_each_entry(async_desc, &bchan->desc_list, desc_node) {
237 +                       if (async_desc->vd.tx.cookie != cookie)
238 +                               continue;
239 +
240 +                       for (i = 0; i < async_desc->num_desc; i++)
241 +                               residue += async_desc->curr_desc[i].size;
242 +               }
243 +       }
244  
245         spin_unlock_irqrestore(&bchan->vc.lock, flags);
246  
247 @@ -928,63 +953,86 @@ static void bam_start_dma(struct bam_cha
248  {
249         struct virt_dma_desc *vd = vchan_next_desc(&bchan->vc);
250         struct bam_device *bdev = bchan->bdev;
251 -       struct bam_async_desc *async_desc;
252 +       struct bam_async_desc *async_desc = NULL;
253         struct bam_desc_hw *desc;
254         struct bam_desc_hw *fifo = PTR_ALIGN(bchan->fifo_virt,
255                                         sizeof(struct bam_desc_hw));
256         int ret;
257 +       unsigned int avail;
258 +       struct dmaengine_desc_callback cb;
259  
260         lockdep_assert_held(&bchan->vc.lock);
261  
262         if (!vd)
263                 return;
264  
265 -       list_del(&vd->node);
266 -
267 -       async_desc = container_of(vd, struct bam_async_desc, vd);
268 -       bchan->curr_txd = async_desc;
269 -
270         ret = pm_runtime_get_sync(bdev->dev);
271         if (ret < 0)
272                 return;
273  
274 -       /* on first use, initialize the channel hardware */
275 -       if (!bchan->initialized)
276 -               bam_chan_init_hw(bchan, async_desc->dir);
277 -
278 -       /* apply new slave config changes, if necessary */
279 -       if (bchan->reconfigure)
280 -               bam_apply_new_config(bchan, async_desc->dir);
281 +       while (vd && !IS_BUSY(bchan)) {
282 +               list_del(&vd->node);
283  
284 -       desc = bchan->curr_txd->curr_desc;
285 +               async_desc = container_of(vd, struct bam_async_desc, vd);
286  
287 -       if (async_desc->num_desc > MAX_DESCRIPTORS)
288 -               async_desc->xfer_len = MAX_DESCRIPTORS;
289 -       else
290 -               async_desc->xfer_len = async_desc->num_desc;
291 +               /* on first use, initialize the channel hardware */
292 +               if (!bchan->initialized)
293 +                       bam_chan_init_hw(bchan, async_desc->dir);
294  
295 -       /* set any special flags on the last descriptor */
296 -       if (async_desc->num_desc == async_desc->xfer_len)
297 -               desc[async_desc->xfer_len - 1].flags |=
298 -                                       cpu_to_le16(async_desc->flags);
299 -       else
300 -               desc[async_desc->xfer_len - 1].flags |=
301 -                                       cpu_to_le16(DESC_FLAG_INT);
302 +               /* apply new slave config changes, if necessary */
303 +               if (bchan->reconfigure)
304 +                       bam_apply_new_config(bchan, async_desc->dir);
305 +
306 +               desc = async_desc->curr_desc;
307 +               avail = CIRC_SPACE(bchan->tail, bchan->head,
308 +                                  MAX_DESCRIPTORS + 1);
309 +
310 +               if (async_desc->num_desc > avail)
311 +                       async_desc->xfer_len = avail;
312 +               else
313 +                       async_desc->xfer_len = async_desc->num_desc;
314 +
315 +               /* set any special flags on the last descriptor */
316 +               if (async_desc->num_desc == async_desc->xfer_len)
317 +                       desc[async_desc->xfer_len - 1].flags |=
318 +                                               cpu_to_le16(async_desc->flags);
319  
320 -       if (bchan->tail + async_desc->xfer_len > MAX_DESCRIPTORS) {
321 -               u32 partial = MAX_DESCRIPTORS - bchan->tail;
322 +               vd = vchan_next_desc(&bchan->vc);
323  
324 -               memcpy(&fifo[bchan->tail], desc,
325 -                               partial * sizeof(struct bam_desc_hw));
326 -               memcpy(fifo, &desc[partial], (async_desc->xfer_len - partial) *
327 +               dmaengine_desc_get_callback(&async_desc->vd.tx, &cb);
328 +
329 +               /*
330 +                * An interrupt is generated at this desc, if
331 +                *  - FIFO is FULL.
332 +                *  - No more descriptors to add.
333 +                *  - If a callback completion was requested for this DESC,
334 +                *     In this case, BAM will deliver the completion callback
335 +                *     for this desc and continue processing the next desc.
336 +                */
337 +               if (((avail <= async_desc->xfer_len) || !vd ||
338 +                    dmaengine_desc_callback_valid(&cb)) &&
339 +                   !(async_desc->flags & DESC_FLAG_EOT))
340 +                       desc[async_desc->xfer_len - 1].flags |=
341 +                               cpu_to_le16(DESC_FLAG_INT);
342 +
343 +               if (bchan->tail + async_desc->xfer_len > MAX_DESCRIPTORS) {
344 +                       u32 partial = MAX_DESCRIPTORS - bchan->tail;
345 +
346 +                       memcpy(&fifo[bchan->tail], desc,
347 +                              partial * sizeof(struct bam_desc_hw));
348 +                       memcpy(fifo, &desc[partial],
349 +                              (async_desc->xfer_len - partial) *
350                                 sizeof(struct bam_desc_hw));
351 -       } else {
352 -               memcpy(&fifo[bchan->tail], desc,
353 -                       async_desc->xfer_len * sizeof(struct bam_desc_hw));
354 -       }
355 +               } else {
356 +                       memcpy(&fifo[bchan->tail], desc,
357 +                              async_desc->xfer_len *
358 +                              sizeof(struct bam_desc_hw));
359 +               }
360  
361 -       bchan->tail += async_desc->xfer_len;
362 -       bchan->tail %= MAX_DESCRIPTORS;
363 +               bchan->tail += async_desc->xfer_len;
364 +               bchan->tail %= MAX_DESCRIPTORS;
365 +               list_add_tail(&async_desc->desc_node, &bchan->desc_list);
366 +       }
367  
368         /* ensure descriptor writes and dma start not reordered */
369         wmb();
370 @@ -1013,7 +1061,7 @@ static void dma_tasklet(unsigned long da
371                 bchan = &bdev->channels[i];
372                 spin_lock_irqsave(&bchan->vc.lock, flags);
373  
374 -               if (!list_empty(&bchan->vc.desc_issued) && !bchan->curr_txd)
375 +               if (!list_empty(&bchan->vc.desc_issued) && !IS_BUSY(bchan))
376                         bam_start_dma(bchan);
377                 spin_unlock_irqrestore(&bchan->vc.lock, flags);
378         }
379 @@ -1034,7 +1082,7 @@ static void bam_issue_pending(struct dma
380         spin_lock_irqsave(&bchan->vc.lock, flags);
381  
382         /* if work pending and idle, start a transaction */
383 -       if (vchan_issue_pending(&bchan->vc) && !bchan->curr_txd)
384 +       if (vchan_issue_pending(&bchan->vc) && !IS_BUSY(bchan))
385                 bam_start_dma(bchan);
386  
387         spin_unlock_irqrestore(&bchan->vc.lock, flags);
388 @@ -1138,6 +1186,7 @@ static void bam_channel_init(struct bam_
389  
390         vchan_init(&bchan->vc, &bdev->common);
391         bchan->vc.desc_free = bam_dma_free_desc;
392 +       INIT_LIST_HEAD(&bchan->desc_list);
393  }
394  
395  static const struct of_device_id bam_of_match[] = {