ath79/mikrotik: use routerbootpart partitions
[oweals/openwrt.git] / target / linux / layerscape / patches-5.4 / 701-net-0223-soc-fsl-dpio-Replace-QMAN-array-mode-by-ring-mode-en.patch
1 From 0b8c6bbb0a561f15598f6701089a992bdea3963c Mon Sep 17 00:00:00 2001
2 From: Youri Querry <youri.querry_1@nxp.com>
3 Date: Mon, 4 Nov 2019 11:03:09 -0500
4 Subject: [PATCH] soc: fsl: dpio: Replace QMAN array mode by ring mode enqueue.
5
6 This change of algorithm will enable faster bulk enqueue.
7 This will grately benefit XDP bulk enqueue.
8
9 Signed-off-by: Youri Querry <youri.querry_1@nxp.com>
10 ---
11  drivers/soc/fsl/dpio/qbman-portal.c | 420 +++++++++++++++++++++++++++---------
12  drivers/soc/fsl/dpio/qbman-portal.h |  13 ++
13  2 files changed, 335 insertions(+), 98 deletions(-)
14
15 --- a/drivers/soc/fsl/dpio/qbman-portal.c
16 +++ b/drivers/soc/fsl/dpio/qbman-portal.c
17 @@ -8,6 +8,7 @@
18  #include <asm/cacheflush.h>
19  #include <linux/io.h>
20  #include <linux/slab.h>
21 +#include <linux/spinlock.h>
22  #include <soc/fsl/dpaa2-global.h>
23  
24  #include "qbman-portal.h"
25 @@ -22,6 +23,7 @@
26  
27  /* CINH register offsets */
28  #define QBMAN_CINH_SWP_EQCR_PI      0x800
29 +#define QBMAN_CINH_SWP_EQCR_CI     0x840
30  #define QBMAN_CINH_SWP_EQAR    0x8c0
31  #define QBMAN_CINH_SWP_CR_RT        0x900
32  #define QBMAN_CINH_SWP_VDQCR_RT     0x940
33 @@ -45,6 +47,8 @@
34  #define QBMAN_CENA_SWP_CR      0x600
35  #define QBMAN_CENA_SWP_RR(vb)  (0x700 + ((u32)(vb) >> 1))
36  #define QBMAN_CENA_SWP_VDQCR   0x780
37 +#define QBMAN_CENA_SWP_EQCR_CI 0x840
38 +#define QBMAN_CENA_SWP_EQCR_CI_MEMBACK 0x1840
39  
40  /* CENA register offsets in memory-backed mode */
41  #define QBMAN_CENA_SWP_DQRR_MEM(n)  (0x800 + ((u32)(n) << 6))
42 @@ -72,6 +76,12 @@
43  /* opaque token for static dequeues */
44  #define QMAN_SDQCR_TOKEN    0xbb
45  
46 +#define QBMAN_EQCR_DCA_IDXMASK          0x0f
47 +#define QBMAN_ENQUEUE_FLAG_DCA          (1ULL << 31)
48 +
49 +#define EQ_DESC_SIZE_WITHOUT_FD 29
50 +#define EQ_DESC_SIZE_FD_START 32
51 +
52  enum qbman_sdqcr_dct {
53         qbman_sdqcr_dct_null = 0,
54         qbman_sdqcr_dct_prio_ics,
55 @@ -224,6 +234,15 @@ static inline u32 qbman_set_swp_cfg(u8 m
56  
57  #define QMAN_RT_MODE      0x00000100
58  
59 +static inline u8 qm_cyc_diff(u8 ringsize, u8 first, u8 last)
60 +{
61 +       /* 'first' is included, 'last' is excluded */
62 +       if (first <= last)
63 +               return last - first;
64 +       else
65 +               return (2 * ringsize) - (first - last);
66 +}
67 +
68  /**
69   * qbman_swp_init() - Create a functional object representing the given
70   *                    QBMan portal descriptor.
71 @@ -236,6 +255,10 @@ struct qbman_swp *qbman_swp_init(const s
72  {
73         struct qbman_swp *p = kzalloc(sizeof(*p), GFP_KERNEL);
74         u32 reg;
75 +       u32 mask_size;
76 +       u32 eqcr_pi;
77 +
78 +       spin_lock_init(&p->access_spinlock);
79  
80         if (!p)
81                 return NULL;
82 @@ -264,25 +287,38 @@ struct qbman_swp *qbman_swp_init(const s
83         p->addr_cena = d->cena_bar;
84         p->addr_cinh = d->cinh_bar;
85  
86 -       if ((p->desc->qman_version & QMAN_REV_MASK) >= QMAN_REV_5000)
87 -               memset(p->addr_cena, 0, 64 * 1024);
88 +       if ((p->desc->qman_version & QMAN_REV_MASK) < QMAN_REV_5000) {
89  
90 -       reg = qbman_set_swp_cfg(p->dqrr.dqrr_size,
91 -                               0, /* Writes cacheable */
92 -                               0, /* EQCR_CI stashing threshold */
93 -                               3, /* RPM: Valid bit mode, RCR in array mode */
94 -                               2, /* DCM: Discrete consumption ack mode */
95 -                               3, /* EPM: Valid bit mode, EQCR in array mode */
96 -                               1, /* mem stashing drop enable == TRUE */
97 -                               1, /* mem stashing priority == TRUE */
98 -                               1, /* mem stashing enable == TRUE */
99 -                               1, /* dequeue stashing priority == TRUE */
100 -                               0, /* dequeue stashing enable == FALSE */
101 -                               0); /* EQCR_CI stashing priority == FALSE */
102 -       if ((p->desc->qman_version & QMAN_REV_MASK) >= QMAN_REV_5000)
103 +               reg = qbman_set_swp_cfg(p->dqrr.dqrr_size,
104 +                       0, /* Writes Non-cacheable */
105 +                       0, /* EQCR_CI stashing threshold */
106 +                       3, /* RPM: RCR in array mode */
107 +                       2, /* DCM: Discrete consumption ack */
108 +                       2, /* EPM: EQCR in ring mode */
109 +                       1, /* mem stashing drop enable enable */
110 +                       1, /* mem stashing priority enable */
111 +                       1, /* mem stashing enable */
112 +                       1, /* dequeue stashing priority enable */
113 +                       0, /* dequeue stashing enable enable */
114 +                       0); /* EQCR_CI stashing priority enable */
115 +       } else {
116 +               memset(p->addr_cena, 0, 64 * 1024);
117 +               reg = qbman_set_swp_cfg(p->dqrr.dqrr_size,
118 +                       0, /* Writes Non-cacheable */
119 +                       1, /* EQCR_CI stashing threshold */
120 +                       3, /* RPM: RCR in array mode */
121 +                       2, /* DCM: Discrete consumption ack */
122 +                       0, /* EPM: EQCR in ring mode */
123 +                       1, /* mem stashing drop enable */
124 +                       1, /* mem stashing priority enable */
125 +                       1, /* mem stashing enable */
126 +                       1, /* dequeue stashing priority enable */
127 +                       0, /* dequeue stashing enable */
128 +                       0); /* EQCR_CI stashing priority enable */
129                 reg |= 1 << SWP_CFG_CPBS_SHIFT | /* memory-backed mode */
130                        1 << SWP_CFG_VPM_SHIFT |  /* VDQCR read triggered mode */
131                        1 << SWP_CFG_CPM_SHIFT;   /* CR read triggered mode */
132 +       }
133  
134         qbman_write_register(p, QBMAN_CINH_SWP_CFG, reg);
135         reg = qbman_read_register(p, QBMAN_CINH_SWP_CFG);
136 @@ -304,7 +340,9 @@ struct qbman_swp *qbman_swp_init(const s
137          */
138         qbman_write_register(p, QBMAN_CINH_SWP_SDQCR, 0);
139  
140 +       p->eqcr.pi_ring_size = 8;
141         if ((p->desc->qman_version & QMAN_REV_MASK) >= QMAN_REV_5000) {
142 +               p->eqcr.pi_ring_size = 32;
143                 qbman_swp_enqueue_ptr =
144                         qbman_swp_enqueue_mem_back;
145                 qbman_swp_enqueue_multiple_ptr =
146 @@ -316,6 +354,15 @@ struct qbman_swp *qbman_swp_init(const s
147                 qbman_swp_release_ptr = qbman_swp_release_mem_back;
148         }
149  
150 +       for (mask_size = p->eqcr.pi_ring_size; mask_size > 0; mask_size >>= 1)
151 +               p->eqcr.pi_ci_mask = (p->eqcr.pi_ci_mask << 1) + 1;
152 +       eqcr_pi = qbman_read_register(p, QBMAN_CINH_SWP_EQCR_PI);
153 +       p->eqcr.pi = eqcr_pi & p->eqcr.pi_ci_mask;
154 +       p->eqcr.pi_vb = eqcr_pi & QB_VALID_BIT;
155 +       p->eqcr.ci = qbman_read_register(p, QBMAN_CINH_SWP_EQCR_CI)
156 +                       & p->eqcr.pi_ci_mask;
157 +       p->eqcr.available = p->eqcr.pi_ring_size;
158 +
159         return p;
160  }
161  
162 @@ -468,8 +515,9 @@ enum qb_enqueue_commands {
163         enqueue_rejects_to_fq = 2
164  };
165  
166 -#define QB_ENQUEUE_CMD_ORP_ENABLE_SHIFT      2
167 -#define QB_ENQUEUE_CMD_TARGET_TYPE_SHIFT     4
168 +#define QB_ENQUEUE_CMD_ORP_ENABLE_SHIFT         2
169 +#define QB_ENQUEUE_CMD_TARGET_TYPE_SHIFT        4
170 +#define QB_ENQUEUE_CMD_DCA_EN_SHIFT             7
171  
172  /**
173   * qbman_eq_desc_clear() - Clear the contents of a descriptor to
174 @@ -582,6 +630,7 @@ static inline void qbman_write_eqcr_am_r
175                                      QMAN_RT_MODE);
176  }
177  
178 +#define QB_RT_BIT ((u32)0x100)
179  /**
180   * qbman_swp_enqueue_direct() - Issue an enqueue command
181   * @s:  the software portal used for enqueue
182 @@ -593,35 +642,19 @@ static inline void qbman_write_eqcr_am_r
183   *
184   * Return 0 for successful enqueue, -EBUSY if the EQCR is not ready.
185   */
186 -int qbman_swp_enqueue_direct(struct qbman_swp *s, const struct qbman_eq_desc *d,
187 -                     const struct dpaa2_fd *fd)
188 +static
189 +int qbman_swp_enqueue_direct(struct qbman_swp *s,
190 +                            const struct qbman_eq_desc *d,
191 +                            const struct dpaa2_fd *fd)
192  {
193 -       struct qbman_eq_desc_with_fd *p;
194 -       u32 eqar = qbman_read_register(s, QBMAN_CINH_SWP_EQAR);
195 -
196 -       if (!EQAR_SUCCESS(eqar))
197 -               return -EBUSY;
198 +       int flags = 0;
199 +       int ret = qbman_swp_enqueue_multiple_direct(s, d, fd, &flags, 1);
200  
201 -       p = qbman_get_cmd(s, QBMAN_CENA_SWP_EQCR(EQAR_IDX(eqar)));
202 -       /* This is mapped as DEVICE type memory, writes are
203 -        * with address alignment:
204 -        * desc.dca address alignment = 1
205 -        * desc.seqnum address alignment = 2
206 -        * desc.orpid address alignment = 4
207 -        * desc.tgtid address alignment = 8
208 -        */
209 -       p->desc.dca = d->dca;
210 -       p->desc.seqnum = d->seqnum;
211 -       p->desc.orpid = d->orpid;
212 -       memcpy(&p->desc.tgtid, &d->tgtid, 24);
213 -       memcpy(&p->fd, fd, sizeof(*fd));
214 -
215 -       /* Set the verb byte, have to substitute in the valid-bit */
216 -       dma_wmb();
217 -       p->desc.verb = d->verb | EQAR_VB(eqar);
218 -       dccvac(p);
219 -
220 -       return 0;
221 +       if (ret >= 0)
222 +               ret = 0;
223 +       else
224 +               ret = -EBUSY;
225 +       return  ret;
226  }
227  
228  /**
229 @@ -635,35 +668,19 @@ int qbman_swp_enqueue_direct(struct qbma
230   *
231   * Return 0 for successful enqueue, -EBUSY if the EQCR is not ready.
232   */
233 +static
234  int qbman_swp_enqueue_mem_back(struct qbman_swp *s,
235                                const struct qbman_eq_desc *d,
236                                const struct dpaa2_fd *fd)
237  {
238 -       struct qbman_eq_desc_with_fd *p;
239 -       u32 eqar = qbman_read_register(s, QBMAN_CINH_SWP_EQAR);
240 -
241 -       if (!EQAR_SUCCESS(eqar))
242 -               return -EBUSY;
243 -
244 -       p = qbman_get_cmd(s, QBMAN_CENA_SWP_EQCR(EQAR_IDX(eqar)));
245 -       /* This is mapped as DEVICE type memory, writes are
246 -        * with address alignment:
247 -        * desc.dca address alignment = 1
248 -        * desc.seqnum address alignment = 2
249 -        * desc.orpid address alignment = 4
250 -        * desc.tgtid address alignment = 8
251 -        */
252 -       p->desc.dca = d->dca;
253 -       p->desc.seqnum = d->seqnum;
254 -       p->desc.orpid = d->orpid;
255 -       memcpy(&p->desc.tgtid, &d->tgtid, 24);
256 -       memcpy(&p->fd, fd, sizeof(*fd));
257 -
258 -       p->desc.verb = d->verb | EQAR_VB(eqar);
259 -       dma_wmb();
260 -       qbman_write_eqcr_am_rt_register(s, EQAR_IDX(eqar));
261 +       int flags = 0;
262 +       int ret = qbman_swp_enqueue_multiple_mem_back(s, d, fd, &flags, 1);
263  
264 -       return 0;
265 +       if (ret >= 0)
266 +               ret = 0;
267 +       else
268 +               ret = -EBUSY;
269 +       return  ret;
270  }
271  
272  /**
273 @@ -672,26 +689,84 @@ int qbman_swp_enqueue_mem_back(struct qb
274   * @s:  the software portal used for enqueue
275   * @d:  the enqueue descriptor
276   * @fd: table pointer of frame descriptor table to be enqueued
277 - * @flags: table pointer of flags, not used for the moment
278 + * @flags: table pointer of QBMAN_ENQUEUE_FLAG_DCA flags, not used if NULL
279   * @num_frames: number of fd to be enqueued
280   *
281   * Return the number of fd enqueued, or a negative error number.
282   */
283 +static
284  int qbman_swp_enqueue_multiple_direct(struct qbman_swp *s,
285                                       const struct qbman_eq_desc *d,
286                                       const struct dpaa2_fd *fd,
287                                       uint32_t *flags,
288                                       int num_frames)
289  {
290 -       int count = 0;
291 +       uint32_t *p = NULL;
292 +       const uint32_t *cl = (uint32_t *)d;
293 +       uint32_t eqcr_ci, eqcr_pi, half_mask, full_mask;
294 +       int i, num_enqueued = 0;
295 +       uint64_t addr_cena;
296 +
297 +       spin_lock(&s->access_spinlock);
298 +       half_mask = (s->eqcr.pi_ci_mask>>1);
299 +       full_mask = s->eqcr.pi_ci_mask;
300 +
301 +       if (!s->eqcr.available) {
302 +               eqcr_ci = s->eqcr.ci;
303 +               p = s->addr_cena + QBMAN_CENA_SWP_EQCR_CI;
304 +               s->eqcr.ci = qbman_read_register(s, QBMAN_CINH_SWP_EQCR_CI);
305 +
306 +               s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size,
307 +                                       eqcr_ci, s->eqcr.ci);
308 +               if (!s->eqcr.available) {
309 +                       spin_unlock(&s->access_spinlock);
310 +                       return 0;
311 +               }
312 +       }
313  
314 -       while (count < num_frames) {
315 -               if (qbman_swp_enqueue_direct(s, d, fd) != 0)
316 -                       break;
317 -               count++;
318 +       eqcr_pi = s->eqcr.pi;
319 +       num_enqueued = (s->eqcr.available < num_frames) ?
320 +                       s->eqcr.available : num_frames;
321 +       s->eqcr.available -= num_enqueued;
322 +       /* Fill in the EQCR ring */
323 +       for (i = 0; i < num_enqueued; i++) {
324 +               p = (s->addr_cena + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
325 +               /* Skip copying the verb */
326 +               memcpy(&p[1], &cl[1], EQ_DESC_SIZE_WITHOUT_FD - 1);
327 +               memcpy(&p[EQ_DESC_SIZE_FD_START/sizeof(uint32_t)],
328 +                      &fd[i], sizeof(*fd));
329 +               eqcr_pi++;
330         }
331  
332 -       return count;
333 +       dma_wmb();
334 +
335 +       /* Set the verb byte, have to substitute in the valid-bit */
336 +       eqcr_pi = s->eqcr.pi;
337 +       for (i = 0; i < num_enqueued; i++) {
338 +               p = (s->addr_cena + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
339 +               p[0] = cl[0] | s->eqcr.pi_vb;
340 +               if (flags && (flags[i] & QBMAN_ENQUEUE_FLAG_DCA)) {
341 +                       struct qbman_eq_desc *d = (struct qbman_eq_desc *)p;
342 +
343 +                       d->dca = (1 << QB_ENQUEUE_CMD_DCA_EN_SHIFT) |
344 +                               ((flags[i]) & QBMAN_EQCR_DCA_IDXMASK);
345 +               }
346 +               eqcr_pi++;
347 +               if (!(eqcr_pi & half_mask))
348 +                       s->eqcr.pi_vb ^= QB_VALID_BIT;
349 +       }
350 +
351 +       /* Flush all the cacheline without load/store in between */
352 +       eqcr_pi = s->eqcr.pi;
353 +       addr_cena = (size_t)s->addr_cena;
354 +       for (i = 0; i < num_enqueued; i++) {
355 +               dccvac((addr_cena + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)));
356 +               eqcr_pi++;
357 +       }
358 +       s->eqcr.pi = eqcr_pi & full_mask;
359 +       spin_unlock(&s->access_spinlock);
360 +
361 +       return num_enqueued;
362  }
363  
364  /**
365 @@ -700,26 +775,80 @@ int qbman_swp_enqueue_multiple_direct(st
366   * @s:  the software portal used for enqueue
367   * @d:  the enqueue descriptor
368   * @fd: table pointer of frame descriptor table to be enqueued
369 - * @flags: table pointer of flags, not used for the moment
370 + * @flags: table pointer of QBMAN_ENQUEUE_FLAG_DCA flags, not used if NULL
371   * @num_frames: number of fd to be enqueued
372   *
373   * Return the number of fd enqueued, or a negative error number.
374   */
375 +static
376  int qbman_swp_enqueue_multiple_mem_back(struct qbman_swp *s,
377 -                                     const struct qbman_eq_desc *d,
378 -                                     const struct dpaa2_fd *fd,
379 -                                     uint32_t *flags,
380 -                                     int num_frames)
381 -{
382 -       int count = 0;
383 +                                       const struct qbman_eq_desc *d,
384 +                                       const struct dpaa2_fd *fd,
385 +                                       uint32_t *flags,
386 +                                       int num_frames)
387 +{
388 +       uint32_t *p = NULL;
389 +       const uint32_t *cl = (uint32_t *)(d);
390 +       uint32_t eqcr_ci, eqcr_pi, half_mask, full_mask;
391 +       int i, num_enqueued = 0;
392 +       unsigned long irq_flags;
393 +
394 +       spin_lock(&s->access_spinlock);
395 +       local_irq_save(irq_flags);
396 +
397 +       half_mask = (s->eqcr.pi_ci_mask>>1);
398 +       full_mask = s->eqcr.pi_ci_mask;
399 +       if (!s->eqcr.available) {
400 +               eqcr_ci = s->eqcr.ci;
401 +               p = s->addr_cena + QBMAN_CENA_SWP_EQCR_CI_MEMBACK;
402 +               s->eqcr.ci = __raw_readl(p) & full_mask;
403 +               s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size,
404 +                                       eqcr_ci, s->eqcr.ci);
405 +               if (!s->eqcr.available) {
406 +                       local_irq_restore(irq_flags);
407 +                       spin_unlock(&s->access_spinlock);
408 +                       return 0;
409 +               }
410 +       }
411 +
412 +       eqcr_pi = s->eqcr.pi;
413 +       num_enqueued = (s->eqcr.available < num_frames) ?
414 +                       s->eqcr.available : num_frames;
415 +       s->eqcr.available -= num_enqueued;
416 +       /* Fill in the EQCR ring */
417 +       for (i = 0; i < num_enqueued; i++) {
418 +               p = (s->addr_cena + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
419 +               /* Skip copying the verb */
420 +               memcpy(&p[1], &cl[1], EQ_DESC_SIZE_WITHOUT_FD - 1);
421 +               memcpy(&p[EQ_DESC_SIZE_FD_START/sizeof(uint32_t)],
422 +                      &fd[i], sizeof(*fd));
423 +               eqcr_pi++;
424 +       }
425  
426 -       while (count < num_frames) {
427 -               if (qbman_swp_enqueue_mem_back(s, d, fd) != 0)
428 -                       break;
429 -               count++;
430 +       /* Set the verb byte, have to substitute in the valid-bit */
431 +       eqcr_pi = s->eqcr.pi;
432 +       for (i = 0; i < num_enqueued; i++) {
433 +               p = (s->addr_cena + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
434 +               p[0] = cl[0] | s->eqcr.pi_vb;
435 +               if (flags && (flags[i] & QBMAN_ENQUEUE_FLAG_DCA)) {
436 +                       struct qbman_eq_desc *d = (struct qbman_eq_desc *)p;
437 +
438 +                       d->dca = (1 << QB_ENQUEUE_CMD_DCA_EN_SHIFT) |
439 +                               ((flags[i]) & QBMAN_EQCR_DCA_IDXMASK);
440 +               }
441 +               eqcr_pi++;
442 +               if (!(eqcr_pi & half_mask))
443 +                       s->eqcr.pi_vb ^= QB_VALID_BIT;
444         }
445 +       s->eqcr.pi = eqcr_pi & full_mask;
446 +
447 +       dma_wmb();
448 +       qbman_write_register(s, QBMAN_CINH_SWP_EQCR_PI,
449 +                               (QB_RT_BIT)|(s->eqcr.pi)|s->eqcr.pi_vb);
450 +       local_irq_restore(irq_flags);
451 +       spin_unlock(&s->access_spinlock);
452  
453 -       return count;
454 +       return num_enqueued;
455  }
456  
457  /**
458 @@ -732,20 +861,69 @@ int qbman_swp_enqueue_multiple_mem_back(
459   *
460   * Return the number of fd enqueued, or a negative error number.
461   */
462 +static
463  int qbman_swp_enqueue_multiple_desc_direct(struct qbman_swp *s,
464                                            const struct qbman_eq_desc *d,
465                                            const struct dpaa2_fd *fd,
466                                            int num_frames)
467  {
468 -       int count = 0;
469 +       uint32_t *p;
470 +       const uint32_t *cl;
471 +       uint32_t eqcr_ci, eqcr_pi, half_mask, full_mask;
472 +       int i, num_enqueued = 0;
473 +       uint64_t addr_cena;
474 +
475 +       half_mask = (s->eqcr.pi_ci_mask>>1);
476 +       full_mask = s->eqcr.pi_ci_mask;
477 +       if (!s->eqcr.available) {
478 +               eqcr_ci = s->eqcr.ci;
479 +               p = s->addr_cena + QBMAN_CENA_SWP_EQCR_CI;
480 +               s->eqcr.ci = qbman_read_register(s, QBMAN_CINH_SWP_EQCR_CI);
481 +               s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size,
482 +                                       eqcr_ci, s->eqcr.ci);
483 +               if (!s->eqcr.available)
484 +                       return 0;
485 +       }
486  
487 -       while (count < num_frames) {
488 -               if (qbman_swp_enqueue_direct(s, &(d[count]), fd) != 0)
489 -                       break;
490 -               count++;
491 +       eqcr_pi = s->eqcr.pi;
492 +       num_enqueued = (s->eqcr.available < num_frames) ?
493 +                       s->eqcr.available : num_frames;
494 +       s->eqcr.available -= num_enqueued;
495 +       /* Fill in the EQCR ring */
496 +       for (i = 0; i < num_enqueued; i++) {
497 +               p = (s->addr_cena + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
498 +               cl = (uint32_t *)(&d[i]);
499 +               /* Skip copying the verb */
500 +               memcpy(&p[1], &cl[1], EQ_DESC_SIZE_WITHOUT_FD - 1);
501 +               memcpy(&p[EQ_DESC_SIZE_FD_START/sizeof(uint32_t)],
502 +                      &fd[i], sizeof(*fd));
503 +               eqcr_pi++;
504         }
505  
506 -       return count;
507 +       dma_wmb();
508 +
509 +       /* Set the verb byte, have to substitute in the valid-bit */
510 +       eqcr_pi = s->eqcr.pi;
511 +       for (i = 0; i < num_enqueued; i++) {
512 +               p = (s->addr_cena + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
513 +               cl = (uint32_t *)(&d[i]);
514 +               p[0] = cl[0] | s->eqcr.pi_vb;
515 +               eqcr_pi++;
516 +               if (!(eqcr_pi & half_mask))
517 +                       s->eqcr.pi_vb ^= QB_VALID_BIT;
518 +       }
519 +
520 +       /* Flush all the cacheline without load/store in between */
521 +       eqcr_pi = s->eqcr.pi;
522 +       addr_cena = (uint64_t)s->addr_cena;
523 +       for (i = 0; i < num_enqueued; i++) {
524 +               dccvac((uint64_t *)(addr_cena +
525 +                       QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)));
526 +               eqcr_pi++;
527 +       }
528 +       s->eqcr.pi = eqcr_pi & full_mask;
529 +
530 +       return num_enqueued;
531  }
532  
533  /**
534 @@ -758,20 +936,62 @@ int qbman_swp_enqueue_multiple_desc_dire
535   *
536   * Return the number of fd enqueued, or a negative error number.
537   */
538 +static
539  int qbman_swp_enqueue_multiple_desc_mem_back(struct qbman_swp *s,
540                                            const struct qbman_eq_desc *d,
541                                            const struct dpaa2_fd *fd,
542                                            int num_frames)
543  {
544 -       int count = 0;
545 +       uint32_t *p;
546 +       const uint32_t *cl;
547 +       uint32_t eqcr_ci, eqcr_pi, half_mask, full_mask;
548 +       int i, num_enqueued = 0;
549 +
550 +       half_mask = (s->eqcr.pi_ci_mask>>1);
551 +       full_mask = s->eqcr.pi_ci_mask;
552 +       if (!s->eqcr.available) {
553 +               eqcr_ci = s->eqcr.ci;
554 +               p = s->addr_cena + QBMAN_CENA_SWP_EQCR_CI_MEMBACK;
555 +               s->eqcr.ci = __raw_readl(p) & full_mask;
556 +               s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size,
557 +                                       eqcr_ci, s->eqcr.ci);
558 +               if (!s->eqcr.available)
559 +                       return 0;
560 +       }
561  
562 -       while (count < num_frames) {
563 -               if (qbman_swp_enqueue_mem_back(s, &(d[count]), fd) != 0)
564 -                       break;
565 -               count++;
566 +       eqcr_pi = s->eqcr.pi;
567 +       num_enqueued = (s->eqcr.available < num_frames) ?
568 +                       s->eqcr.available : num_frames;
569 +       s->eqcr.available -= num_enqueued;
570 +       /* Fill in the EQCR ring */
571 +       for (i = 0; i < num_enqueued; i++) {
572 +               p = (s->addr_cena + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
573 +               cl = (uint32_t *)(&d[i]);
574 +               /* Skip copying the verb */
575 +               memcpy(&p[1], &cl[1], EQ_DESC_SIZE_WITHOUT_FD - 1);
576 +               memcpy(&p[EQ_DESC_SIZE_FD_START/sizeof(uint32_t)],
577 +                      &fd[i], sizeof(*fd));
578 +               eqcr_pi++;
579         }
580  
581 -       return count;
582 +       /* Set the verb byte, have to substitute in the valid-bit */
583 +       eqcr_pi = s->eqcr.pi;
584 +       for (i = 0; i < num_enqueued; i++) {
585 +               p = (s->addr_cena + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
586 +               cl = (uint32_t *)(&d[i]);
587 +               p[0] = cl[0] | s->eqcr.pi_vb;
588 +               eqcr_pi++;
589 +               if (!(eqcr_pi & half_mask))
590 +                       s->eqcr.pi_vb ^= QB_VALID_BIT;
591 +       }
592 +
593 +       s->eqcr.pi = eqcr_pi & full_mask;
594 +
595 +       dma_wmb();
596 +       qbman_write_register(s, QBMAN_CINH_SWP_EQCR_PI,
597 +                               (QB_RT_BIT)|(s->eqcr.pi)|s->eqcr.pi_vb);
598 +
599 +       return num_enqueued;
600  }
601  
602  /* Static (push) dequeue */
603 @@ -937,6 +1157,7 @@ void qbman_pull_desc_set_channel(struct
604   * Return 0 for success, and -EBUSY if the software portal is not ready
605   * to do pull dequeue.
606   */
607 +static
608  int qbman_swp_pull_direct(struct qbman_swp *s, struct qbman_pull_desc *d)
609  {
610         struct qbman_pull_desc *p;
611 @@ -973,6 +1194,7 @@ int qbman_swp_pull_direct(struct qbman_s
612   * Return 0 for success, and -EBUSY if the software portal is not ready
613   * to do pull dequeue.
614   */
615 +static
616  int qbman_swp_pull_mem_back(struct qbman_swp *s, struct qbman_pull_desc *d)
617  {
618         struct qbman_pull_desc *p;
619 @@ -991,6 +1213,8 @@ int qbman_swp_pull_mem_back(struct qbman
620         p->dq_src = d->dq_src;
621         p->rsp_addr = d->rsp_addr;
622         p->rsp_addr_virt = d->rsp_addr_virt;
623 +
624 +       /* Set the verb byte, have to substitute in the valid-bit */
625         p->verb = d->verb | s->vdq.valid_bit;
626         s->vdq.valid_bit ^= QB_VALID_BIT;
627         dma_wmb();
628 --- a/drivers/soc/fsl/dpio/qbman-portal.h
629 +++ b/drivers/soc/fsl/dpio/qbman-portal.h
630 @@ -143,6 +143,19 @@ struct qbman_swp {
631                 u8 dqrr_size;
632                 int reset_bug; /* indicates dqrr reset workaround is needed */
633         } dqrr;
634 +
635 +       struct {
636 +               u32 pi;
637 +               u32 pi_vb;
638 +               u32 pi_ring_size;
639 +               u32 pi_ci_mask;
640 +               u32 ci;
641 +               int available;
642 +               u32 pend;
643 +               u32 no_pfdr;
644 +       } eqcr;
645 +
646 +       spinlock_t access_spinlock;
647  };
648  
649  /* Function pointers */