aa8d0526ba3b9c2144a98dd8cb880d2d34e84e80
[oweals/openwrt.git] /
1 From 60f3db31d4cb785befed715b80c430f60f647701 Mon Sep 17 00:00:00 2001
2 From: yaroslavros <yaroslavros@gmail.com>
3 Date: Wed, 14 Aug 2019 15:22:55 +0100
4 Subject: [PATCH] Ported pcie-brcmstb bounce buffer implementation to
5  ARM64. (#3144)
6
7 Ported pcie-brcmstb bounce buffer implementation to ARM64.
8 This enables full 4G RAM usage on Raspberry Pi in 64-bit mode.
9
10 Signed-off-by: Yaroslav Rosomakho <yaroslavros@gmail.com>
11 ---
12  arch/arm64/mm/dma-mapping.c                   |  29 +
13  drivers/pci/controller/Makefile               |   3 +
14  drivers/pci/controller/pcie-brcmstb-bounce.h  |   2 +-
15  .../pci/controller/pcie-brcmstb-bounce64.c    | 569 ++++++++++++++++++
16  drivers/pci/controller/pcie-brcmstb.c         |  32 +-
17  5 files changed, 610 insertions(+), 25 deletions(-)
18  create mode 100644 drivers/pci/controller/pcie-brcmstb-bounce64.c
19
20 --- a/arch/arm64/mm/dma-mapping.c
21 +++ b/arch/arm64/mm/dma-mapping.c
22 @@ -31,6 +31,35 @@ void arch_dma_prep_coherent(struct page
23  }
24  
25  #ifdef CONFIG_IOMMU_DMA
26 +static int __swiotlb_get_sgtable_page(struct sg_table *sgt,
27 +                                     struct page *page, size_t size)
28 +{
29 +       int ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
30 +
31 +       if (!ret)
32 +               sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0);
33 +
34 +       return ret;
35 +}
36 +
37 +static int __swiotlb_mmap_pfn(struct vm_area_struct *vma,
38 +                             unsigned long pfn, size_t size)
39 +{
40 +       int ret = -ENXIO;
41 +       unsigned long nr_vma_pages = vma_pages(vma);
42 +       unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
43 +       unsigned long off = vma->vm_pgoff;
44 +
45 +       if (off < nr_pages && nr_vma_pages <= (nr_pages - off)) {
46 +               ret = remap_pfn_range(vma, vma->vm_start,
47 +                                     pfn + off,
48 +                                     vma->vm_end - vma->vm_start,
49 +                                     vma->vm_page_prot);
50 +       }
51 +
52 +       return ret;
53 +}
54 +
55  void arch_teardown_dma_ops(struct device *dev)
56  {
57         dev->dma_ops = NULL;
58 --- a/drivers/pci/controller/Makefile
59 +++ b/drivers/pci/controller/Makefile
60 @@ -33,6 +33,9 @@ obj-$(CONFIG_PCIE_BRCMSTB) += pcie-brcms
61  ifdef CONFIG_ARM
62  obj-$(CONFIG_PCIE_BRCMSTB) += pcie-brcmstb-bounce.o
63  endif
64 +ifdef CONFIG_ARM64
65 +obj-$(CONFIG_PCIE_BRCMSTB) += pcie-brcmstb-bounce64.o
66 +endif
67  
68  obj-$(CONFIG_VMD) += vmd.o
69  # pcie-hisi.o quirks are needed even without CONFIG_PCIE_DW
70 --- a/drivers/pci/controller/pcie-brcmstb-bounce.h
71 +++ b/drivers/pci/controller/pcie-brcmstb-bounce.h
72 @@ -6,7 +6,7 @@
73  #ifndef _PCIE_BRCMSTB_BOUNCE_H
74  #define _PCIE_BRCMSTB_BOUNCE_H
75  
76 -#ifdef CONFIG_ARM
77 +#if defined(CONFIG_ARM) || defined(CONFIG_ARM64)
78  
79  int brcm_pcie_bounce_init(struct device *dev, unsigned long buffer_size,
80                           dma_addr_t threshold);
81 --- /dev/null
82 +++ b/drivers/pci/controller/pcie-brcmstb-bounce64.c
83 @@ -0,0 +1,569 @@
84 +/*
85 + *  This code started out as a version of arch/arm/common/dmabounce.c,
86 + *  modified to cope with highmem pages. Now it has been changed heavily -
87 + *  it now preallocates a large block (currently 4MB) and carves it up
88 + *  sequentially in ring fashion, and DMA is used to copy the data - to the
89 + *  point where very little of the original remains.
90 + *
91 + *  Copyright (C) 2019 Raspberry Pi (Trading) Ltd.
92 + *
93 + *  Original version by Brad Parker (brad@heeltoe.com)
94 + *  Re-written by Christopher Hoover <ch@murgatroid.com>
95 + *  Made generic by Deepak Saxena <dsaxena@plexity.net>
96 + *
97 + *  Copyright (C) 2002 Hewlett Packard Company.
98 + *  Copyright (C) 2004 MontaVista Software, Inc.
99 + *
100 + *  This program is free software; you can redistribute it and/or
101 + *  modify it under the terms of the GNU General Public License
102 + *  version 2 as published by the Free Software Foundation.
103 + */
104 +
105 +#include <linux/module.h>
106 +#include <linux/init.h>
107 +#include <linux/slab.h>
108 +#include <linux/page-flags.h>
109 +#include <linux/device.h>
110 +#include <linux/dma-mapping.h>
111 +#include <linux/dma-direct.h>
112 +#include <linux/dma-noncoherent.h>
113 +#include <linux/dmapool.h>
114 +#include <linux/list.h>
115 +#include <linux/scatterlist.h>
116 +#include <linux/bitmap.h>
117 +#include <linux/swiotlb.h>
118 +
119 +#include <asm/cacheflush.h>
120 +
121 +#define STATS
122 +
123 +#ifdef STATS
124 +#define DO_STATS(X) do { X ; } while (0)
125 +#else
126 +#define DO_STATS(X) do { } while (0)
127 +#endif
128 +
129 +/* ************************************************** */
130 +
131 +struct safe_buffer {
132 +       struct list_head node;
133 +
134 +       /* original request */
135 +       size_t          size;
136 +       int             direction;
137 +
138 +       struct dmabounce_pool *pool;
139 +       void            *safe;
140 +       dma_addr_t      unsafe_dma_addr;
141 +       dma_addr_t      safe_dma_addr;
142 +};
143 +
144 +struct dmabounce_pool {
145 +       unsigned long   pages;
146 +       void            *virt_addr;
147 +       dma_addr_t      dma_addr;
148 +       unsigned long   *alloc_map;
149 +       unsigned long   alloc_pos;
150 +       spinlock_t      lock;
151 +       struct device   *dev;
152 +       unsigned long   num_pages;
153 +#ifdef STATS
154 +       size_t          max_size;
155 +       unsigned long   num_bufs;
156 +       unsigned long   max_bufs;
157 +       unsigned long   max_pages;
158 +#endif
159 +};
160 +
161 +struct dmabounce_device_info {
162 +       struct device *dev;
163 +       dma_addr_t threshold;
164 +       struct list_head safe_buffers;
165 +       struct dmabounce_pool pool;
166 +       rwlock_t lock;
167 +#ifdef STATS
168 +       unsigned long map_count;
169 +       unsigned long unmap_count;
170 +       unsigned long sync_dev_count;
171 +       unsigned long sync_cpu_count;
172 +       unsigned long fail_count;
173 +       int attr_res;
174 +#endif
175 +};
176 +
177 +static struct dmabounce_device_info *g_dmabounce_device_info;
178 +
179 +extern int bcm2838_dma40_memcpy_init(void);
180 +extern void bcm2838_dma40_memcpy(dma_addr_t dst, dma_addr_t src, size_t size);
181 +
182 +#ifdef STATS
183 +static ssize_t
184 +bounce_show(struct device *dev, struct device_attribute *attr, char *buf)
185 +{
186 +       struct dmabounce_device_info *device_info = g_dmabounce_device_info;
187 +       return sprintf(buf, "m:%lu/%lu s:%lu/%lu f:%lu s:%zu b:%lu/%lu a:%lu/%lu\n",
188 +               device_info->map_count,
189 +               device_info->unmap_count,
190 +               device_info->sync_dev_count,
191 +               device_info->sync_cpu_count,
192 +               device_info->fail_count,
193 +               device_info->pool.max_size,
194 +               device_info->pool.num_bufs,
195 +               device_info->pool.max_bufs,
196 +               device_info->pool.num_pages * PAGE_SIZE,
197 +               device_info->pool.max_pages * PAGE_SIZE);
198 +}
199 +
200 +static DEVICE_ATTR(dmabounce_stats, 0444, bounce_show, NULL);
201 +#endif
202 +
203 +static int bounce_create(struct dmabounce_pool *pool, struct device *dev,
204 +                        unsigned long buffer_size)
205 +{
206 +       int ret = -ENOMEM;
207 +       pool->pages = (buffer_size + PAGE_SIZE - 1)/PAGE_SIZE;
208 +       pool->alloc_map = bitmap_zalloc(pool->pages, GFP_KERNEL);
209 +       if (!pool->alloc_map)
210 +               goto err_bitmap;
211 +       pool->virt_addr = dma_alloc_coherent(dev, pool->pages * PAGE_SIZE,
212 +                                            &pool->dma_addr, GFP_KERNEL);
213 +       if (!pool->virt_addr)
214 +               goto err_dmabuf;
215 +
216 +       pool->alloc_pos = 0;
217 +       spin_lock_init(&pool->lock);
218 +       pool->dev = dev;
219 +       pool->num_pages = 0;
220 +
221 +       DO_STATS(pool->max_size = 0);
222 +       DO_STATS(pool->num_bufs = 0);
223 +       DO_STATS(pool->max_bufs = 0);
224 +       DO_STATS(pool->max_pages = 0);
225 +
226 +       return  0;
227 +
228 +err_dmabuf:
229 +       bitmap_free(pool->alloc_map);
230 +err_bitmap:
231 +       return ret;
232 +}
233 +
234 +static void bounce_destroy(struct dmabounce_pool *pool)
235 +{
236 +       dma_free_coherent(pool->dev, pool->pages * PAGE_SIZE, pool->virt_addr,
237 +                         pool->dma_addr);
238 +
239 +       bitmap_free(pool->alloc_map);
240 +}
241 +
242 +static void *bounce_alloc(struct dmabounce_pool *pool, size_t size,
243 +                         dma_addr_t *dmaaddrp)
244 +{
245 +       unsigned long pages;
246 +       unsigned long flags;
247 +       unsigned long pos;
248 +
249 +       pages = (size + PAGE_SIZE - 1)/PAGE_SIZE;
250 +
251 +       DO_STATS(pool->max_size = max(size, pool->max_size));
252 +
253 +       spin_lock_irqsave(&pool->lock, flags);
254 +       pos = bitmap_find_next_zero_area(pool->alloc_map, pool->pages,
255 +                                        pool->alloc_pos, pages, 0);
256 +       /* If not found, try from the start */
257 +       if (pos >= pool->pages && pool->alloc_pos)
258 +               pos = bitmap_find_next_zero_area(pool->alloc_map, pool->pages,
259 +                                                0, pages, 0);
260 +
261 +       if (pos >= pool->pages) {
262 +               spin_unlock_irqrestore(&pool->lock, flags);
263 +               return NULL;
264 +       }
265 +
266 +       bitmap_set(pool->alloc_map, pos, pages);
267 +       pool->alloc_pos = (pos + pages) % pool->pages;
268 +       pool->num_pages += pages;
269 +
270 +       DO_STATS(pool->num_bufs++);
271 +       DO_STATS(pool->max_bufs = max(pool->num_bufs, pool->max_bufs));
272 +       DO_STATS(pool->max_pages = max(pool->num_pages, pool->max_pages));
273 +
274 +       spin_unlock_irqrestore(&pool->lock, flags);
275 +
276 +       *dmaaddrp = pool->dma_addr + pos * PAGE_SIZE;
277 +
278 +       return pool->virt_addr + pos * PAGE_SIZE;
279 +}
280 +
281 +static void
282 +bounce_free(struct dmabounce_pool *pool, void *buf, size_t size)
283 +{
284 +       unsigned long pages;
285 +       unsigned long flags;
286 +       unsigned long pos;
287 +
288 +       pages = (size + PAGE_SIZE - 1)/PAGE_SIZE;
289 +       pos = (buf - pool->virt_addr)/PAGE_SIZE;
290 +
291 +       BUG_ON((buf - pool->virt_addr) & (PAGE_SIZE - 1));
292 +
293 +       spin_lock_irqsave(&pool->lock, flags);
294 +       bitmap_clear(pool->alloc_map, pos, pages);
295 +       pool->num_pages -= pages;
296 +       if (pool->num_pages == 0)
297 +               pool->alloc_pos = 0;
298 +       DO_STATS(pool->num_bufs--);
299 +       spin_unlock_irqrestore(&pool->lock, flags);
300 +}
301 +
302 +/* allocate a 'safe' buffer and keep track of it */
303 +static struct safe_buffer *
304 +alloc_safe_buffer(struct dmabounce_device_info *device_info,
305 +                 dma_addr_t dma_addr, size_t size, enum dma_data_direction dir)
306 +{
307 +       struct safe_buffer *buf;
308 +       struct dmabounce_pool *pool = &device_info->pool;
309 +       struct device *dev = device_info->dev;
310 +       unsigned long flags;
311 +
312 +       /*
313 +        * Although one might expect this to be called in thread context,
314 +        * using GFP_KERNEL here leads to hard-to-debug lockups. in_atomic()
315 +        * was previously used to select the appropriate allocation mode,
316 +        * but this is unsafe.
317 +        */
318 +       buf = kmalloc(sizeof(struct safe_buffer), GFP_ATOMIC);
319 +       if (!buf) {
320 +               dev_warn(dev, "%s: kmalloc failed\n", __func__);
321 +               return NULL;
322 +       }
323 +
324 +       buf->unsafe_dma_addr = dma_addr;
325 +       buf->size = size;
326 +       buf->direction = dir;
327 +       buf->pool = pool;
328 +
329 +       buf->safe = bounce_alloc(pool, size, &buf->safe_dma_addr);
330 +
331 +       if (!buf->safe) {
332 +               dev_warn(dev,
333 +                        "%s: could not alloc dma memory (size=%zu)\n",
334 +                        __func__, size);
335 +               kfree(buf);
336 +               return NULL;
337 +       }
338 +
339 +       write_lock_irqsave(&device_info->lock, flags);
340 +       list_add(&buf->node, &device_info->safe_buffers);
341 +       write_unlock_irqrestore(&device_info->lock, flags);
342 +
343 +       return buf;
344 +}
345 +
346 +/* determine if a buffer is from our "safe" pool */
347 +static struct safe_buffer *
348 +find_safe_buffer(struct dmabounce_device_info *device_info,
349 +                dma_addr_t safe_dma_addr)
350 +{
351 +       struct safe_buffer *b, *rb = NULL;
352 +       unsigned long flags;
353 +
354 +       read_lock_irqsave(&device_info->lock, flags);
355 +
356 +       list_for_each_entry(b, &device_info->safe_buffers, node)
357 +               if (b->safe_dma_addr <= safe_dma_addr &&
358 +                   b->safe_dma_addr + b->size > safe_dma_addr) {
359 +                       rb = b;
360 +                       break;
361 +               }
362 +
363 +       read_unlock_irqrestore(&device_info->lock, flags);
364 +       return rb;
365 +}
366 +
367 +static void
368 +free_safe_buffer(struct dmabounce_device_info *device_info,
369 +                struct safe_buffer *buf)
370 +{
371 +       unsigned long flags;
372 +
373 +       write_lock_irqsave(&device_info->lock, flags);
374 +       list_del(&buf->node);
375 +       write_unlock_irqrestore(&device_info->lock, flags);
376 +
377 +       bounce_free(buf->pool, buf->safe, buf->size);
378 +
379 +       kfree(buf);
380 +}
381 +
382 +/* ************************************************** */
383 +
384 +static struct safe_buffer *
385 +find_safe_buffer_dev(struct device *dev, dma_addr_t dma_addr, const char *where)
386 +{
387 +       if (!dev || !g_dmabounce_device_info)
388 +               return NULL;
389 +       if (dma_mapping_error(dev, dma_addr)) {
390 +               dev_err(dev, "Trying to %s invalid mapping\n", where);
391 +               return NULL;
392 +       }
393 +       return find_safe_buffer(g_dmabounce_device_info, dma_addr);
394 +}
395 +
396 +static dma_addr_t
397 +map_single(struct device *dev, struct safe_buffer *buf, size_t size,
398 +          enum dma_data_direction dir, unsigned long attrs)
399 +{
400 +       BUG_ON(buf->size != size);
401 +       BUG_ON(buf->direction != dir);
402 +
403 +       dev_dbg(dev, "map: %llx->%llx\n", (u64)buf->unsafe_dma_addr,
404 +               (u64)buf->safe_dma_addr);
405 +
406 +       if ((dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) &&
407 +           !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
408 +               bcm2838_dma40_memcpy(buf->safe_dma_addr, buf->unsafe_dma_addr,
409 +                                    size);
410 +
411 +       return buf->safe_dma_addr;
412 +}
413 +
414 +static dma_addr_t
415 +unmap_single(struct device *dev, struct safe_buffer *buf, size_t size,
416 +            enum dma_data_direction dir, unsigned long attrs)
417 +{
418 +       BUG_ON(buf->size != size);
419 +       BUG_ON(buf->direction != dir);
420 +
421 +       if ((dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) &&
422 +           !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) {
423 +               dev_dbg(dev, "unmap: %llx->%llx\n", (u64)buf->safe_dma_addr,
424 +                       (u64)buf->unsafe_dma_addr);
425 +
426 +               bcm2838_dma40_memcpy(buf->unsafe_dma_addr, buf->safe_dma_addr,
427 +                                    size);
428 +       }
429 +       return buf->unsafe_dma_addr;
430 +}
431 +
432 +/* ************************************************** */
433 +
434 +/*
435 + * see if a buffer address is in an 'unsafe' range.  if it is
436 + * allocate a 'safe' buffer and copy the unsafe buffer into it.
437 + * substitute the safe buffer for the unsafe one.
438 + * (basically move the buffer from an unsafe area to a safe one)
439 + */
440 +static dma_addr_t
441 +dmabounce_map_page(struct device *dev, struct page *page, unsigned long offset,
442 +                  size_t size, enum dma_data_direction dir,
443 +                  unsigned long attrs)
444 +{
445 +       struct dmabounce_device_info *device_info = g_dmabounce_device_info;
446 +       dma_addr_t dma_addr;
447 +
448 +       dma_addr = phys_to_dma(dev, page_to_phys(page)) + offset;
449 +
450 +       dma_direct_sync_single_for_device(dev, dma_addr, size, dir);
451 +        if (!dev_is_dma_coherent(dev))
452 +               __dma_map_area(phys_to_virt(dma_to_phys(dev, dma_addr)), size, dir);
453 +
454 +       if (device_info && (dma_addr + size) > device_info->threshold) {
455 +               struct safe_buffer *buf;
456 +
457 +               buf = alloc_safe_buffer(device_info, dma_addr, size, dir);
458 +               if (!buf) {
459 +                       DO_STATS(device_info->fail_count++);
460 +                       return (~(dma_addr_t)0x0);
461 +               }
462 +
463 +               DO_STATS(device_info->map_count++);
464 +
465 +               dma_addr = map_single(dev, buf, size, dir, attrs);
466 +       }
467 +       return dma_addr;
468 +}
469 +
470 +/*
471 + * see if a mapped address was really a "safe" buffer and if so, copy
472 + * the data from the safe buffer back to the unsafe buffer and free up
473 + * the safe buffer.  (basically return things back to the way they
474 + * should be)
475 + */
476 +static void
477 +dmabounce_unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size,
478 +                    enum dma_data_direction dir, unsigned long attrs)
479 +{
480 +       struct safe_buffer *buf;
481 +
482 +       buf = find_safe_buffer_dev(dev, dma_addr, __func__);
483 +       if (buf) {
484 +               DO_STATS(g_dmabounce_device_info->unmap_count++);
485 +               dma_addr = unmap_single(dev, buf, size, dir, attrs);
486 +               free_safe_buffer(g_dmabounce_device_info, buf);
487 +       }
488 +
489 +        if (!dev_is_dma_coherent(dev))
490 +               __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dma_addr)), size, dir);
491 +       dma_direct_sync_single_for_cpu(dev, dma_addr, size, dir);
492 +}
493 +
494 +/*
495 + * A version of dmabounce_map_page that assumes the mapping has already
496 + * been created - intended for streaming operation.
497 + */
498 +static void
499 +dmabounce_sync_for_device(struct device *dev, dma_addr_t dma_addr, size_t size,
500 +                         enum dma_data_direction dir)
501 +{
502 +       struct safe_buffer *buf;
503 +
504 +        dma_direct_sync_single_for_device(dev, dma_addr, size, dir);
505 +        if (!dev_is_dma_coherent(dev))
506 +                __dma_map_area(phys_to_virt(dma_to_phys(dev, dma_addr)), size, dir);
507 +
508 +       buf = find_safe_buffer_dev(dev, dma_addr, __func__);
509 +       if (buf) {
510 +               DO_STATS(g_dmabounce_device_info->sync_dev_count++);
511 +               map_single(dev, buf, size, dir, 0);
512 +       }
513 +}
514 +
515 +/*
516 + * A version of dmabounce_unmap_page that doesn't destroy the mapping -
517 + * intended for streaming operation.
518 + */
519 +static void
520 +dmabounce_sync_for_cpu(struct device *dev, dma_addr_t dma_addr,
521 +                      size_t size, enum dma_data_direction dir)
522 +{
523 +       struct safe_buffer *buf;
524 +
525 +       buf = find_safe_buffer_dev(dev, dma_addr, __func__);
526 +       if (buf) {
527 +               DO_STATS(g_dmabounce_device_info->sync_cpu_count++);
528 +               dma_addr = unmap_single(dev, buf, size, dir, 0);
529 +       }
530 +
531 +        if (!dev_is_dma_coherent(dev))
532 +                __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dma_addr)), size, dir);
533 +        dma_direct_sync_single_for_cpu(dev, dma_addr, size, dir);
534 +}
535 +
536 +static int dmabounce_dma_supported(struct device *dev, u64 dma_mask)
537 +{
538 +       if (g_dmabounce_device_info)
539 +               return 0;
540 +
541 +       return dma_direct_supported(dev, dma_mask);
542 +}
543 +
544 +static const struct dma_map_ops dmabounce_ops = {
545 +       .alloc                  = dma_direct_alloc,
546 +       .free                   = dma_direct_free,
547 +       .map_page               = dmabounce_map_page,
548 +       .unmap_page             = dmabounce_unmap_page,
549 +       .sync_single_for_cpu    = dmabounce_sync_for_cpu,
550 +       .sync_single_for_device = dmabounce_sync_for_device,
551 +       .map_sg                 = dma_direct_map_sg,
552 +       .unmap_sg               = dma_direct_unmap_sg,
553 +       .sync_sg_for_cpu        = dma_direct_sync_sg_for_cpu,
554 +       .sync_sg_for_device     = dma_direct_sync_sg_for_device,
555 +       .dma_supported          = dmabounce_dma_supported,
556 +};
557 +
558 +int brcm_pcie_bounce_init(struct device *dev,
559 +                         unsigned long buffer_size,
560 +                         dma_addr_t threshold)
561 +{
562 +       struct dmabounce_device_info *device_info;
563 +       int ret;
564 +
565 +       /* Only support a single client */
566 +       if (g_dmabounce_device_info)
567 +               return -EBUSY;
568 +
569 +       ret = bcm2838_dma40_memcpy_init();
570 +       if (ret)
571 +               return ret;
572 +
573 +       device_info = kmalloc(sizeof(struct dmabounce_device_info), GFP_ATOMIC);
574 +       if (!device_info) {
575 +               dev_err(dev,
576 +                       "Could not allocated dmabounce_device_info\n");
577 +               return -ENOMEM;
578 +       }
579 +
580 +       ret = bounce_create(&device_info->pool, dev, buffer_size);
581 +       if (ret) {
582 +               dev_err(dev,
583 +                       "dmabounce: could not allocate %ld byte DMA pool\n",
584 +                       buffer_size);
585 +               goto err_bounce;
586 +       }
587 +
588 +       device_info->dev = dev;
589 +       device_info->threshold = threshold;
590 +       INIT_LIST_HEAD(&device_info->safe_buffers);
591 +       rwlock_init(&device_info->lock);
592 +
593 +       DO_STATS(device_info->map_count = 0);
594 +       DO_STATS(device_info->unmap_count = 0);
595 +       DO_STATS(device_info->sync_dev_count = 0);
596 +       DO_STATS(device_info->sync_cpu_count = 0);
597 +       DO_STATS(device_info->fail_count = 0);
598 +       DO_STATS(device_info->attr_res =
599 +                device_create_file(dev, &dev_attr_dmabounce_stats));
600 +
601 +       g_dmabounce_device_info = device_info;
602 +
603 +       dev_err(dev, "dmabounce: initialised - %ld kB, threshold %pad\n",
604 +                buffer_size / 1024, &threshold);
605 +
606 +       return 0;
607 +
608 + err_bounce:
609 +       kfree(device_info);
610 +       return ret;
611 +}
612 +EXPORT_SYMBOL(brcm_pcie_bounce_init);
613 +
614 +void brcm_pcie_bounce_uninit(struct device *dev)
615 +{
616 +       struct dmabounce_device_info *device_info = g_dmabounce_device_info;
617 +
618 +       g_dmabounce_device_info = NULL;
619 +
620 +       if (!device_info) {
621 +               dev_warn(dev,
622 +                        "Never registered with dmabounce but attempting"
623 +                        "to unregister!\n");
624 +               return;
625 +       }
626 +
627 +       if (!list_empty(&device_info->safe_buffers)) {
628 +               dev_err(dev,
629 +                       "Removing from dmabounce with pending buffers!\n");
630 +               BUG();
631 +       }
632 +
633 +       bounce_destroy(&device_info->pool);
634 +
635 +       DO_STATS(if (device_info->attr_res == 0)
636 +                        device_remove_file(dev, &dev_attr_dmabounce_stats));
637 +
638 +       kfree(device_info);
639 +}
640 +EXPORT_SYMBOL(brcm_pcie_bounce_uninit);
641 +
642 +int brcm_pcie_bounce_register_dev(struct device *dev)
643 +{
644 +       set_dma_ops(dev, &dmabounce_ops);
645 +
646 +       return 0;
647 +}
648 +EXPORT_SYMBOL(brcm_pcie_bounce_register_dev);
649 +
650 +MODULE_AUTHOR("Phil Elwell <phil@raspberrypi.org>");
651 +MODULE_DESCRIPTION("Dedicate DMA bounce support for pcie-brcmstb");
652 +MODULE_LICENSE("GPL");
653 --- a/drivers/pci/controller/pcie-brcmstb.c
654 +++ b/drivers/pci/controller/pcie-brcmstb.c
655 @@ -611,28 +611,6 @@ static const struct dma_map_ops brcm_dma
656  
657  static void brcm_set_dma_ops(struct device *dev)
658  {
659 -       int ret;
660 -
661 -       if (IS_ENABLED(CONFIG_ARM64)) {
662 -               /*
663 -                * We are going to invoke get_dma_ops().  That
664 -                * function, at this point in time, invokes
665 -                * get_arch_dma_ops(), and for ARM64 that function
666 -                * returns a pointer to dummy_dma_ops.  So then we'd
667 -                * like to call arch_setup_dma_ops(), but that isn't
668 -                * exported.  Instead, we call of_dma_configure(),
669 -                * which is exported, and this calls
670 -                * arch_setup_dma_ops().  Once we do this the call to
671 -                * get_dma_ops() will work properly because
672 -                * dev->dma_ops will be set.
673 -                */
674 -               ret = of_dma_configure(dev, dev->of_node, true);
675 -               if (ret) {
676 -                       dev_err(dev, "of_dma_configure() failed: %d\n", ret);
677 -                       return;
678 -               }
679 -       }
680 -
681         arch_dma_ops = get_dma_ops(dev);
682         if (!arch_dma_ops) {
683                 dev_err(dev, "failed to get arch_dma_ops\n");
684 @@ -651,12 +629,12 @@ static int brcmstb_platform_notifier(str
685         extern unsigned long max_pfn;
686         struct device *dev = __dev;
687         const char *rc_name = "0000:00:00.0";
688 +       int ret;
689  
690         switch (event) {
691         case BUS_NOTIFY_ADD_DEVICE:
692                 if (max_pfn > (bounce_threshold/PAGE_SIZE) &&
693                     strcmp(dev->kobj.name, rc_name)) {
694 -                       int ret;
695  
696                         ret = brcm_pcie_bounce_register_dev(dev);
697                         if (ret) {
698 @@ -665,8 +643,14 @@ static int brcmstb_platform_notifier(str
699                                         ret);
700                                 return ret;
701                         }
702 -                       brcm_set_dma_ops(dev);
703 +               } else if (IS_ENABLED(CONFIG_ARM64)) {
704 +                       ret = of_dma_configure(dev, dev->of_node, true);
705 +                       if (ret) {
706 +                               dev_err(dev, "of_dma_configure() failed: %d\n", ret);
707 +                               return ret;
708 +                       }
709                 }
710 +               brcm_set_dma_ops(dev);
711                 return NOTIFY_OK;
712  
713         case BUS_NOTIFY_DEL_DEVICE: