df682766c389fba4e76f7a5c0cb594ee4b2d9bbe
[oweals/openwrt.git] /
1 From 2f9799e1c9bd7f03792a89ff7c9d55f0c570d661 Mon Sep 17 00:00:00 2001
2 From: yaroslavros <yaroslavros@gmail.com>
3 Date: Wed, 14 Aug 2019 15:22:55 +0100
4 Subject: [PATCH 768/773] Ported pcie-brcmstb bounce buffer implementation to
5  ARM64. (#3144)
6
7 Ported pcie-brcmstb bounce buffer implementation to ARM64.
8 This enables full 4G RAM usage on Raspberry Pi in 64-bit mode.
9
10 Signed-off-by: Yaroslav Rosomakho <yaroslavros@gmail.com>
11 ---
12  arch/arm64/include/asm/dma-mapping.h          |  21 +
13  arch/arm64/mm/dma-mapping.c                   |  50 ++
14  drivers/pci/controller/Makefile               |   3 +
15  drivers/pci/controller/pcie-brcmstb-bounce.h  |   2 +-
16  .../pci/controller/pcie-brcmstb-bounce64.c    | 576 ++++++++++++++++++
17  drivers/pci/controller/pcie-brcmstb.c         |  30 +-
18  6 files changed, 658 insertions(+), 24 deletions(-)
19  create mode 100644 drivers/pci/controller/pcie-brcmstb-bounce64.c
20
21 --- a/arch/arm64/include/asm/dma-mapping.h
22 +++ b/arch/arm64/include/asm/dma-mapping.h
23 @@ -24,6 +24,27 @@
24  #include <xen/xen.h>
25  #include <asm/xen/hypervisor.h>
26  
27 +extern void *arm64_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
28 +                          gfp_t gfp, unsigned long attrs);
29 +extern void arm64_dma_free(struct device *dev, size_t size, void *cpu_addr,
30 +                        dma_addr_t handle, unsigned long attrs);
31 +extern int arm64_dma_mmap(struct device *dev, struct vm_area_struct *vma,
32 +                       void *cpu_addr, dma_addr_t dma_addr, size_t size,
33 +                       unsigned long attrs);
34 +extern int arm64_dma_get_sgtable(struct device *dev, struct sg_table *sgt,
35 +               void *cpu_addr, dma_addr_t dma_addr, size_t size,
36 +               unsigned long attrs);
37 +extern int arm64_dma_map_sg(struct device *dev, struct scatterlist *sgl, int nelems,
38 +               enum dma_data_direction dir, unsigned long attrs);
39 +extern void arm64_dma_unmap_sg(struct device *dev, struct scatterlist *sgl, int,
40 +               enum dma_data_direction dir, unsigned long attrs);
41 +extern void arm64_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl, int nelems,
42 +               enum dma_data_direction dir);
43 +extern void arm64_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sgl, int nelems,
44 +               enum dma_data_direction dir);
45 +
46 +
47 +
48  extern const struct dma_map_ops dummy_dma_ops;
49  
50  static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
51 --- a/arch/arm64/mm/dma-mapping.c
52 +++ b/arch/arm64/mm/dma-mapping.c
53 @@ -138,6 +138,12 @@ no_mem:
54         return NULL;
55  }
56  
57 +void *arm64_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
58 +                           gfp_t gfp, unsigned long attrs)
59 +{
60 +        return __dma_alloc(dev, size, handle, gfp, attrs);
61 +}
62 +
63  static void __dma_free(struct device *dev, size_t size,
64                        void *vaddr, dma_addr_t dma_handle,
65                        unsigned long attrs)
66 @@ -154,6 +160,12 @@ static void __dma_free(struct device *de
67         swiotlb_free(dev, size, swiotlb_addr, dma_handle, attrs);
68  }
69  
70 +void arm64_dma_free(struct device *dev, size_t size, void *cpu_addr,
71 +                         dma_addr_t handle, unsigned long attrs)
72 +{
73 +        __dma_free(dev, size, cpu_addr, handle, attrs);
74 +}
75 +
76  static dma_addr_t __swiotlb_map_page(struct device *dev, struct page *page,
77                                      unsigned long offset, size_t size,
78                                      enum dma_data_direction dir,
79 @@ -197,6 +209,12 @@ static int __swiotlb_map_sg_attrs(struct
80         return ret;
81  }
82  
83 +int arm64_dma_map_sg(struct device *dev, struct scatterlist *sgl, int nelems,
84 +                enum dma_data_direction dir, unsigned long attrs)
85 +{
86 +       return __swiotlb_map_sg_attrs(dev, sgl, nelems, dir, attrs);
87 +}
88 +
89  static void __swiotlb_unmap_sg_attrs(struct device *dev,
90                                      struct scatterlist *sgl, int nelems,
91                                      enum dma_data_direction dir,
92 @@ -213,6 +231,12 @@ static void __swiotlb_unmap_sg_attrs(str
93         swiotlb_unmap_sg_attrs(dev, sgl, nelems, dir, attrs);
94  }
95  
96 +void arm64_dma_unmap_sg(struct device *dev, struct scatterlist *sgl, int nelems,
97 +                enum dma_data_direction dir, unsigned long attrs)
98 +{
99 +       __swiotlb_unmap_sg_attrs(dev, sgl, nelems, dir, attrs);
100 +}
101 +
102  static void __swiotlb_sync_single_for_cpu(struct device *dev,
103                                           dma_addr_t dev_addr, size_t size,
104                                           enum dma_data_direction dir)
105 @@ -245,6 +269,12 @@ static void __swiotlb_sync_sg_for_cpu(st
106         swiotlb_sync_sg_for_cpu(dev, sgl, nelems, dir);
107  }
108  
109 +void arm64_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl, int nelems,
110 +                enum dma_data_direction dir)
111 +{
112 +       __swiotlb_sync_sg_for_cpu(dev, sgl, nelems, dir);
113 +}
114 +
115  static void __swiotlb_sync_sg_for_device(struct device *dev,
116                                          struct scatterlist *sgl, int nelems,
117                                          enum dma_data_direction dir)
118 @@ -259,6 +289,12 @@ static void __swiotlb_sync_sg_for_device
119                                        sg->length, dir);
120  }
121  
122 +void arm64_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sgl, int nelems,
123 +                enum dma_data_direction dir)
124 +{
125 +       __swiotlb_sync_sg_for_device(dev, sgl, nelems, dir);
126 +}
127 +
128  static int __swiotlb_mmap_pfn(struct vm_area_struct *vma,
129                               unsigned long pfn, size_t size)
130  {
131 @@ -294,6 +330,13 @@ static int __swiotlb_mmap(struct device
132         return __swiotlb_mmap_pfn(vma, pfn, size);
133  }
134  
135 +int arm64_dma_mmap(struct device *dev, struct vm_area_struct *vma,
136 +                        void *cpu_addr, dma_addr_t dma_addr, size_t size,
137 +                        unsigned long attrs)
138 +{
139 +       return __swiotlb_mmap(dev, vma, cpu_addr, dma_addr, size, attrs);
140 +}
141 +
142  static int __swiotlb_get_sgtable_page(struct sg_table *sgt,
143                                       struct page *page, size_t size)
144  {
145 @@ -314,6 +357,13 @@ static int __swiotlb_get_sgtable(struct
146         return __swiotlb_get_sgtable_page(sgt, page, size);
147  }
148  
149 +int arm64_dma_get_sgtable(struct device *dev, struct sg_table *sgt,
150 +                void *cpu_addr, dma_addr_t dma_addr, size_t size,
151 +                unsigned long attrs)
152 +{
153 +       return __swiotlb_get_sgtable(dev, sgt, cpu_addr, dma_addr, size, attrs);
154 +}
155 +
156  static int __swiotlb_dma_supported(struct device *hwdev, u64 mask)
157  {
158         if (swiotlb)
159 --- a/drivers/pci/controller/Makefile
160 +++ b/drivers/pci/controller/Makefile
161 @@ -32,6 +32,9 @@ obj-$(CONFIG_PCIE_BRCMSTB) += pcie-brcms
162  ifdef CONFIG_ARM
163  obj-$(CONFIG_PCIE_BRCMSTB) += pcie-brcmstb-bounce.o
164  endif
165 +ifdef CONFIG_ARM64
166 +obj-$(CONFIG_PCIE_BRCMSTB) += pcie-brcmstb-bounce64.o
167 +endif
168  
169  obj-$(CONFIG_VMD) += vmd.o
170  # pcie-hisi.o quirks are needed even without CONFIG_PCIE_DW
171 --- a/drivers/pci/controller/pcie-brcmstb-bounce.h
172 +++ b/drivers/pci/controller/pcie-brcmstb-bounce.h
173 @@ -6,7 +6,7 @@
174  #ifndef _PCIE_BRCMSTB_BOUNCE_H
175  #define _PCIE_BRCMSTB_BOUNCE_H
176  
177 -#ifdef CONFIG_ARM
178 +#if defined(CONFIG_ARM) || defined(CONFIG_ARM64)
179  
180  int brcm_pcie_bounce_init(struct device *dev, unsigned long buffer_size,
181                           dma_addr_t threshold);
182 --- /dev/null
183 +++ b/drivers/pci/controller/pcie-brcmstb-bounce64.c
184 @@ -0,0 +1,576 @@
185 +/*
186 + *  This code started out as a version of arch/arm/common/dmabounce.c,
187 + *  modified to cope with highmem pages. Now it has been changed heavily -
188 + *  it now preallocates a large block (currently 4MB) and carves it up
189 + *  sequentially in ring fashion, and DMA is used to copy the data - to the
190 + *  point where very little of the original remains.
191 + *
192 + *  Copyright (C) 2019 Raspberry Pi (Trading) Ltd.
193 + *
194 + *  Original version by Brad Parker (brad@heeltoe.com)
195 + *  Re-written by Christopher Hoover <ch@murgatroid.com>
196 + *  Made generic by Deepak Saxena <dsaxena@plexity.net>
197 + *
198 + *  Copyright (C) 2002 Hewlett Packard Company.
199 + *  Copyright (C) 2004 MontaVista Software, Inc.
200 + *
201 + *  This program is free software; you can redistribute it and/or
202 + *  modify it under the terms of the GNU General Public License
203 + *  version 2 as published by the Free Software Foundation.
204 + */
205 +
206 +#include <linux/module.h>
207 +#include <linux/init.h>
208 +#include <linux/slab.h>
209 +#include <linux/page-flags.h>
210 +#include <linux/device.h>
211 +#include <linux/dma-mapping.h>
212 +#include <linux/dma-direct.h>
213 +#include <linux/dmapool.h>
214 +#include <linux/list.h>
215 +#include <linux/scatterlist.h>
216 +#include <linux/bitmap.h>
217 +#include <linux/swiotlb.h>
218 +
219 +#include <asm/cacheflush.h>
220 +
221 +#define STATS
222 +
223 +#ifdef STATS
224 +#define DO_STATS(X) do { X ; } while (0)
225 +#else
226 +#define DO_STATS(X) do { } while (0)
227 +#endif
228 +
229 +/* ************************************************** */
230 +
231 +struct safe_buffer {
232 +       struct list_head node;
233 +
234 +       /* original request */
235 +       size_t          size;
236 +       int             direction;
237 +
238 +       struct dmabounce_pool *pool;
239 +       void            *safe;
240 +       dma_addr_t      unsafe_dma_addr;
241 +       dma_addr_t      safe_dma_addr;
242 +};
243 +
244 +struct dmabounce_pool {
245 +       unsigned long   pages;
246 +       void            *virt_addr;
247 +       dma_addr_t      dma_addr;
248 +       unsigned long   *alloc_map;
249 +       unsigned long   alloc_pos;
250 +       spinlock_t      lock;
251 +       struct device   *dev;
252 +       unsigned long   num_pages;
253 +#ifdef STATS
254 +       size_t          max_size;
255 +       unsigned long   num_bufs;
256 +       unsigned long   max_bufs;
257 +       unsigned long   max_pages;
258 +#endif
259 +};
260 +
261 +struct dmabounce_device_info {
262 +       struct device *dev;
263 +       dma_addr_t threshold;
264 +       struct list_head safe_buffers;
265 +       struct dmabounce_pool pool;
266 +       rwlock_t lock;
267 +#ifdef STATS
268 +       unsigned long map_count;
269 +       unsigned long unmap_count;
270 +       unsigned long sync_dev_count;
271 +       unsigned long sync_cpu_count;
272 +       unsigned long fail_count;
273 +       int attr_res;
274 +#endif
275 +};
276 +
277 +static struct dmabounce_device_info *g_dmabounce_device_info;
278 +
279 +extern int bcm2838_dma40_memcpy_init(void);
280 +extern void bcm2838_dma40_memcpy(dma_addr_t dst, dma_addr_t src, size_t size);
281 +
282 +#ifdef STATS
283 +static ssize_t
284 +bounce_show(struct device *dev, struct device_attribute *attr, char *buf)
285 +{
286 +       struct dmabounce_device_info *device_info = g_dmabounce_device_info;
287 +       return sprintf(buf, "m:%lu/%lu s:%lu/%lu f:%lu s:%zu b:%lu/%lu a:%lu/%lu\n",
288 +               device_info->map_count,
289 +               device_info->unmap_count,
290 +               device_info->sync_dev_count,
291 +               device_info->sync_cpu_count,
292 +               device_info->fail_count,
293 +               device_info->pool.max_size,
294 +               device_info->pool.num_bufs,
295 +               device_info->pool.max_bufs,
296 +               device_info->pool.num_pages * PAGE_SIZE,
297 +               device_info->pool.max_pages * PAGE_SIZE);
298 +}
299 +
300 +static DEVICE_ATTR(dmabounce_stats, 0444, bounce_show, NULL);
301 +#endif
302 +
303 +static int bounce_create(struct dmabounce_pool *pool, struct device *dev,
304 +                        unsigned long buffer_size)
305 +{
306 +       int ret = -ENOMEM;
307 +       pool->pages = (buffer_size + PAGE_SIZE - 1)/PAGE_SIZE;
308 +       pool->alloc_map = bitmap_zalloc(pool->pages, GFP_KERNEL);
309 +       if (!pool->alloc_map)
310 +               goto err_bitmap;
311 +       pool->virt_addr = dma_alloc_coherent(dev, pool->pages * PAGE_SIZE,
312 +                                            &pool->dma_addr, GFP_KERNEL);
313 +       if (!pool->virt_addr)
314 +               goto err_dmabuf;
315 +
316 +       pool->alloc_pos = 0;
317 +       spin_lock_init(&pool->lock);
318 +       pool->dev = dev;
319 +       pool->num_pages = 0;
320 +
321 +       DO_STATS(pool->max_size = 0);
322 +       DO_STATS(pool->num_bufs = 0);
323 +       DO_STATS(pool->max_bufs = 0);
324 +       DO_STATS(pool->max_pages = 0);
325 +
326 +       return  0;
327 +
328 +err_dmabuf:
329 +       bitmap_free(pool->alloc_map);
330 +err_bitmap:
331 +       return ret;
332 +}
333 +
334 +static void bounce_destroy(struct dmabounce_pool *pool)
335 +{
336 +       dma_free_coherent(pool->dev, pool->pages * PAGE_SIZE, pool->virt_addr,
337 +                         pool->dma_addr);
338 +
339 +       bitmap_free(pool->alloc_map);
340 +}
341 +
342 +static void *bounce_alloc(struct dmabounce_pool *pool, size_t size,
343 +                         dma_addr_t *dmaaddrp)
344 +{
345 +       unsigned long pages;
346 +       unsigned long flags;
347 +       unsigned long pos;
348 +
349 +       pages = (size + PAGE_SIZE - 1)/PAGE_SIZE;
350 +
351 +       DO_STATS(pool->max_size = max(size, pool->max_size));
352 +
353 +       spin_lock_irqsave(&pool->lock, flags);
354 +       pos = bitmap_find_next_zero_area(pool->alloc_map, pool->pages,
355 +                                        pool->alloc_pos, pages, 0);
356 +       /* If not found, try from the start */
357 +       if (pos >= pool->pages && pool->alloc_pos)
358 +               pos = bitmap_find_next_zero_area(pool->alloc_map, pool->pages,
359 +                                                0, pages, 0);
360 +
361 +       if (pos >= pool->pages) {
362 +               spin_unlock_irqrestore(&pool->lock, flags);
363 +               return NULL;
364 +       }
365 +
366 +       bitmap_set(pool->alloc_map, pos, pages);
367 +       pool->alloc_pos = (pos + pages) % pool->pages;
368 +       pool->num_pages += pages;
369 +
370 +       DO_STATS(pool->num_bufs++);
371 +       DO_STATS(pool->max_bufs = max(pool->num_bufs, pool->max_bufs));
372 +       DO_STATS(pool->max_pages = max(pool->num_pages, pool->max_pages));
373 +
374 +       spin_unlock_irqrestore(&pool->lock, flags);
375 +
376 +       *dmaaddrp = pool->dma_addr + pos * PAGE_SIZE;
377 +
378 +       return pool->virt_addr + pos * PAGE_SIZE;
379 +}
380 +
381 +static void
382 +bounce_free(struct dmabounce_pool *pool, void *buf, size_t size)
383 +{
384 +       unsigned long pages;
385 +       unsigned long flags;
386 +       unsigned long pos;
387 +
388 +       pages = (size + PAGE_SIZE - 1)/PAGE_SIZE;
389 +       pos = (buf - pool->virt_addr)/PAGE_SIZE;
390 +
391 +       BUG_ON((buf - pool->virt_addr) & (PAGE_SIZE - 1));
392 +
393 +       spin_lock_irqsave(&pool->lock, flags);
394 +       bitmap_clear(pool->alloc_map, pos, pages);
395 +       pool->num_pages -= pages;
396 +       if (pool->num_pages == 0)
397 +               pool->alloc_pos = 0;
398 +       DO_STATS(pool->num_bufs--);
399 +       spin_unlock_irqrestore(&pool->lock, flags);
400 +}
401 +
402 +/* allocate a 'safe' buffer and keep track of it */
403 +static struct safe_buffer *
404 +alloc_safe_buffer(struct dmabounce_device_info *device_info,
405 +                 dma_addr_t dma_addr, size_t size, enum dma_data_direction dir)
406 +{
407 +       struct safe_buffer *buf;
408 +       struct dmabounce_pool *pool = &device_info->pool;
409 +       struct device *dev = device_info->dev;
410 +       unsigned long flags;
411 +
412 +       /*
413 +        * Although one might expect this to be called in thread context,
414 +        * using GFP_KERNEL here leads to hard-to-debug lockups. in_atomic()
415 +        * was previously used to select the appropriate allocation mode,
416 +        * but this is unsafe.
417 +        */
418 +       buf = kmalloc(sizeof(struct safe_buffer), GFP_ATOMIC);
419 +       if (!buf) {
420 +               dev_warn(dev, "%s: kmalloc failed\n", __func__);
421 +               return NULL;
422 +       }
423 +
424 +       buf->unsafe_dma_addr = dma_addr;
425 +       buf->size = size;
426 +       buf->direction = dir;
427 +       buf->pool = pool;
428 +
429 +       buf->safe = bounce_alloc(pool, size, &buf->safe_dma_addr);
430 +
431 +       if (!buf->safe) {
432 +               dev_warn(dev,
433 +                        "%s: could not alloc dma memory (size=%d)\n",
434 +                        __func__, size);
435 +               kfree(buf);
436 +               return NULL;
437 +       }
438 +
439 +       write_lock_irqsave(&device_info->lock, flags);
440 +       list_add(&buf->node, &device_info->safe_buffers);
441 +       write_unlock_irqrestore(&device_info->lock, flags);
442 +
443 +       return buf;
444 +}
445 +
446 +/* determine if a buffer is from our "safe" pool */
447 +static struct safe_buffer *
448 +find_safe_buffer(struct dmabounce_device_info *device_info,
449 +                dma_addr_t safe_dma_addr)
450 +{
451 +       struct safe_buffer *b, *rb = NULL;
452 +       unsigned long flags;
453 +
454 +       read_lock_irqsave(&device_info->lock, flags);
455 +
456 +       list_for_each_entry(b, &device_info->safe_buffers, node)
457 +               if (b->safe_dma_addr <= safe_dma_addr &&
458 +                   b->safe_dma_addr + b->size > safe_dma_addr) {
459 +                       rb = b;
460 +                       break;
461 +               }
462 +
463 +       read_unlock_irqrestore(&device_info->lock, flags);
464 +       return rb;
465 +}
466 +
467 +static void
468 +free_safe_buffer(struct dmabounce_device_info *device_info,
469 +                struct safe_buffer *buf)
470 +{
471 +       unsigned long flags;
472 +
473 +       write_lock_irqsave(&device_info->lock, flags);
474 +       list_del(&buf->node);
475 +       write_unlock_irqrestore(&device_info->lock, flags);
476 +
477 +       bounce_free(buf->pool, buf->safe, buf->size);
478 +
479 +       kfree(buf);
480 +}
481 +
482 +/* ************************************************** */
483 +
484 +static struct safe_buffer *
485 +find_safe_buffer_dev(struct device *dev, dma_addr_t dma_addr, const char *where)
486 +{
487 +       if (!dev || !g_dmabounce_device_info)
488 +               return NULL;
489 +       if (dma_mapping_error(dev, dma_addr)) {
490 +               dev_err(dev, "Trying to %s invalid mapping\n", where);
491 +               return NULL;
492 +       }
493 +       return find_safe_buffer(g_dmabounce_device_info, dma_addr);
494 +}
495 +
496 +static dma_addr_t
497 +map_single(struct device *dev, struct safe_buffer *buf, size_t size,
498 +          enum dma_data_direction dir, unsigned long attrs)
499 +{
500 +       BUG_ON(buf->size != size);
501 +       BUG_ON(buf->direction != dir);
502 +
503 +       dev_dbg(dev, "map: %llx->%llx\n", (u64)buf->unsafe_dma_addr,
504 +               (u64)buf->safe_dma_addr);
505 +
506 +       if ((dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) &&
507 +           !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
508 +               bcm2838_dma40_memcpy(buf->safe_dma_addr, buf->unsafe_dma_addr,
509 +                                    size);
510 +
511 +       return buf->safe_dma_addr;
512 +}
513 +
514 +static dma_addr_t
515 +unmap_single(struct device *dev, struct safe_buffer *buf, size_t size,
516 +            enum dma_data_direction dir, unsigned long attrs)
517 +{
518 +       BUG_ON(buf->size != size);
519 +       BUG_ON(buf->direction != dir);
520 +
521 +       if ((dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) &&
522 +           !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) {
523 +               dev_dbg(dev, "unmap: %llx->%llx\n", (u64)buf->safe_dma_addr,
524 +                       (u64)buf->unsafe_dma_addr);
525 +
526 +               bcm2838_dma40_memcpy(buf->unsafe_dma_addr, buf->safe_dma_addr,
527 +                                    size);
528 +       }
529 +       return buf->unsafe_dma_addr;
530 +}
531 +
532 +/* ************************************************** */
533 +
534 +/*
535 + * see if a buffer address is in an 'unsafe' range.  if it is
536 + * allocate a 'safe' buffer and copy the unsafe buffer into it.
537 + * substitute the safe buffer for the unsafe one.
538 + * (basically move the buffer from an unsafe area to a safe one)
539 + */
540 +static dma_addr_t
541 +dmabounce_map_page(struct device *dev, struct page *page, unsigned long offset,
542 +                  size_t size, enum dma_data_direction dir,
543 +                  unsigned long attrs)
544 +{
545 +       struct dmabounce_device_info *device_info = g_dmabounce_device_info;
546 +       dma_addr_t dma_addr;
547 +
548 +       dma_addr = phys_to_dma(dev, page_to_phys(page)) + offset;
549 +
550 +       swiotlb_sync_single_for_device(dev, dma_addr, size, dir);
551 +        if (!is_device_dma_coherent(dev))
552 +               __dma_map_area(phys_to_virt(dma_to_phys(dev, dma_addr)), size, dir);
553 +
554 +       if (device_info && (dma_addr + size) > device_info->threshold) {
555 +               struct safe_buffer *buf;
556 +
557 +               buf = alloc_safe_buffer(device_info, dma_addr, size, dir);
558 +               if (!buf) {
559 +                       DO_STATS(device_info->fail_count++);
560 +                       return (~(dma_addr_t)0x0);
561 +               }
562 +
563 +               DO_STATS(device_info->map_count++);
564 +
565 +               dma_addr = map_single(dev, buf, size, dir, attrs);
566 +       }
567 +       return dma_addr;
568 +}
569 +
570 +/*
571 + * see if a mapped address was really a "safe" buffer and if so, copy
572 + * the data from the safe buffer back to the unsafe buffer and free up
573 + * the safe buffer.  (basically return things back to the way they
574 + * should be)
575 + */
576 +static void
577 +dmabounce_unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size,
578 +                    enum dma_data_direction dir, unsigned long attrs)
579 +{
580 +       struct safe_buffer *buf;
581 +
582 +       buf = find_safe_buffer_dev(dev, dma_addr, __func__);
583 +       if (buf) {
584 +               DO_STATS(g_dmabounce_device_info->unmap_count++);
585 +               dma_addr = unmap_single(dev, buf, size, dir, attrs);
586 +               free_safe_buffer(g_dmabounce_device_info, buf);
587 +       }
588 +
589 +        if (!is_device_dma_coherent(dev))
590 +               __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dma_addr)), size, dir);
591 +       swiotlb_sync_single_for_cpu(dev, dma_addr, size, dir);
592 +}
593 +
594 +/*
595 + * A version of dmabounce_map_page that assumes the mapping has already
596 + * been created - intended for streaming operation.
597 + */
598 +static void
599 +dmabounce_sync_for_device(struct device *dev, dma_addr_t dma_addr, size_t size,
600 +                         enum dma_data_direction dir)
601 +{
602 +       struct safe_buffer *buf;
603 +
604 +        swiotlb_sync_single_for_device(dev, dma_addr, size, dir);
605 +        if (!is_device_dma_coherent(dev))
606 +                __dma_map_area(phys_to_virt(dma_to_phys(dev, dma_addr)), size, dir);
607 +
608 +       buf = find_safe_buffer_dev(dev, dma_addr, __func__);
609 +       if (buf) {
610 +               DO_STATS(g_dmabounce_device_info->sync_dev_count++);
611 +               map_single(dev, buf, size, dir, 0);
612 +       }
613 +}
614 +
615 +/*
616 + * A version of dmabounce_unmap_page that doesn't destroy the mapping -
617 + * intended for streaming operation.
618 + */
619 +static void
620 +dmabounce_sync_for_cpu(struct device *dev, dma_addr_t dma_addr,
621 +                      size_t size, enum dma_data_direction dir)
622 +{
623 +       struct safe_buffer *buf;
624 +
625 +       buf = find_safe_buffer_dev(dev, dma_addr, __func__);
626 +       if (buf) {
627 +               DO_STATS(g_dmabounce_device_info->sync_cpu_count++);
628 +               dma_addr = unmap_single(dev, buf, size, dir, 0);
629 +       }
630 +
631 +        if (!is_device_dma_coherent(dev))
632 +                __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dma_addr)), size, dir);
633 +        swiotlb_sync_single_for_cpu(dev, dma_addr, size, dir);
634 +}
635 +
636 +static int dmabounce_dma_supported(struct device *dev, u64 dma_mask)
637 +{
638 +       if (g_dmabounce_device_info)
639 +               return 0;
640 +
641 +       return swiotlb_dma_supported(dev, dma_mask);
642 +}
643 +
644 +static int dmabounce_mapping_error(struct device *dev, dma_addr_t dma_addr)
645 +{
646 +       return swiotlb_dma_mapping_error(dev, dma_addr);
647 +}
648 +
649 +static const struct dma_map_ops dmabounce_ops = {
650 +       .alloc                  = arm64_dma_alloc,
651 +       .free                   = arm64_dma_free,
652 +       .mmap                   = arm64_dma_mmap,
653 +       .get_sgtable            = arm64_dma_get_sgtable,
654 +       .map_page               = dmabounce_map_page,
655 +       .unmap_page             = dmabounce_unmap_page,
656 +       .sync_single_for_cpu    = dmabounce_sync_for_cpu,
657 +       .sync_single_for_device = dmabounce_sync_for_device,
658 +       .map_sg                 = arm64_dma_map_sg,
659 +       .unmap_sg               = arm64_dma_unmap_sg,
660 +       .sync_sg_for_cpu        = arm64_dma_sync_sg_for_cpu,
661 +       .sync_sg_for_device     = arm64_dma_sync_sg_for_device,
662 +       .dma_supported          = dmabounce_dma_supported,
663 +       .mapping_error          = dmabounce_mapping_error,
664 +};
665 +
666 +int brcm_pcie_bounce_init(struct device *dev,
667 +                         unsigned long buffer_size,
668 +                         dma_addr_t threshold)
669 +{
670 +       struct dmabounce_device_info *device_info;
671 +       int ret;
672 +
673 +       /* Only support a single client */
674 +       if (g_dmabounce_device_info)
675 +               return -EBUSY;
676 +
677 +       ret = bcm2838_dma40_memcpy_init();
678 +       if (ret)
679 +               return ret;
680 +
681 +       device_info = kmalloc(sizeof(struct dmabounce_device_info), GFP_ATOMIC);
682 +       if (!device_info) {
683 +               dev_err(dev,
684 +                       "Could not allocated dmabounce_device_info\n");
685 +               return -ENOMEM;
686 +       }
687 +
688 +       ret = bounce_create(&device_info->pool, dev, buffer_size);
689 +       if (ret) {
690 +               dev_err(dev,
691 +                       "dmabounce: could not allocate %ld byte DMA pool\n",
692 +                       buffer_size);
693 +               goto err_bounce;
694 +       }
695 +
696 +       device_info->dev = dev;
697 +       device_info->threshold = threshold;
698 +       INIT_LIST_HEAD(&device_info->safe_buffers);
699 +       rwlock_init(&device_info->lock);
700 +
701 +       DO_STATS(device_info->map_count = 0);
702 +       DO_STATS(device_info->unmap_count = 0);
703 +       DO_STATS(device_info->sync_dev_count = 0);
704 +       DO_STATS(device_info->sync_cpu_count = 0);
705 +       DO_STATS(device_info->fail_count = 0);
706 +       DO_STATS(device_info->attr_res =
707 +                device_create_file(dev, &dev_attr_dmabounce_stats));
708 +
709 +       g_dmabounce_device_info = device_info;
710 +
711 +       dev_err(dev, "dmabounce: initialised - %ld kB, threshold %pad\n",
712 +                buffer_size / 1024, &threshold);
713 +
714 +       return 0;
715 +
716 + err_bounce:
717 +       kfree(device_info);
718 +       return ret;
719 +}
720 +EXPORT_SYMBOL(brcm_pcie_bounce_init);
721 +
722 +void brcm_pcie_bounce_uninit(struct device *dev)
723 +{
724 +       struct dmabounce_device_info *device_info = g_dmabounce_device_info;
725 +
726 +       g_dmabounce_device_info = NULL;
727 +
728 +       if (!device_info) {
729 +               dev_warn(dev,
730 +                        "Never registered with dmabounce but attempting"
731 +                        "to unregister!\n");
732 +               return;
733 +       }
734 +
735 +       if (!list_empty(&device_info->safe_buffers)) {
736 +               dev_err(dev,
737 +                       "Removing from dmabounce with pending buffers!\n");
738 +               BUG();
739 +       }
740 +
741 +       bounce_destroy(&device_info->pool);
742 +
743 +       DO_STATS(if (device_info->attr_res == 0)
744 +                        device_remove_file(dev, &dev_attr_dmabounce_stats));
745 +
746 +       kfree(device_info);
747 +}
748 +EXPORT_SYMBOL(brcm_pcie_bounce_uninit);
749 +
750 +int brcm_pcie_bounce_register_dev(struct device *dev)
751 +{
752 +       set_dma_ops(dev, &dmabounce_ops);
753 +
754 +       return 0;
755 +}
756 +EXPORT_SYMBOL(brcm_pcie_bounce_register_dev);
757 +
758 +MODULE_AUTHOR("Phil Elwell <phil@raspberrypi.org>");
759 +MODULE_DESCRIPTION("Dedicate DMA bounce support for pcie-brcmstb");
760 +MODULE_LICENSE("GPL");
761 --- a/drivers/pci/controller/pcie-brcmstb.c
762 +++ b/drivers/pci/controller/pcie-brcmstb.c
763 @@ -617,28 +617,6 @@ static const struct dma_map_ops brcm_dma
764  
765  static void brcm_set_dma_ops(struct device *dev)
766  {
767 -       int ret;
768 -
769 -       if (IS_ENABLED(CONFIG_ARM64)) {
770 -               /*
771 -                * We are going to invoke get_dma_ops().  That
772 -                * function, at this point in time, invokes
773 -                * get_arch_dma_ops(), and for ARM64 that function
774 -                * returns a pointer to dummy_dma_ops.  So then we'd
775 -                * like to call arch_setup_dma_ops(), but that isn't
776 -                * exported.  Instead, we call of_dma_configure(),
777 -                * which is exported, and this calls
778 -                * arch_setup_dma_ops().  Once we do this the call to
779 -                * get_dma_ops() will work properly because
780 -                * dev->dma_ops will be set.
781 -                */
782 -               ret = of_dma_configure(dev, dev->of_node, true);
783 -               if (ret) {
784 -                       dev_err(dev, "of_dma_configure() failed: %d\n", ret);
785 -                       return;
786 -               }
787 -       }
788 -
789         arch_dma_ops = get_dma_ops(dev);
790         if (!arch_dma_ops) {
791                 dev_err(dev, "failed to get arch_dma_ops\n");
792 @@ -657,12 +635,12 @@ static int brcmstb_platform_notifier(str
793         extern unsigned long max_pfn;
794         struct device *dev = __dev;
795         const char *rc_name = "0000:00:00.0";
796 +       int ret;
797  
798         switch (event) {
799         case BUS_NOTIFY_ADD_DEVICE:
800                 if (max_pfn > (bounce_threshold/PAGE_SIZE) &&
801                     strcmp(dev->kobj.name, rc_name)) {
802 -                       int ret;
803  
804                         ret = brcm_pcie_bounce_register_dev(dev);
805                         if (ret) {
806 @@ -671,6 +649,12 @@ static int brcmstb_platform_notifier(str
807                                         ret);
808                                 return ret;
809                         }
810 +               } else if (IS_ENABLED(CONFIG_ARM64)) {
811 +                       ret = of_dma_configure(dev, dev->of_node, true);
812 +                       if (ret) {
813 +                               dev_err(dev, "of_dma_configure() failed: %d\n", ret);
814 +                               return;
815 +                       }
816                 }
817                 brcm_set_dma_ops(dev);
818                 return NOTIFY_OK;