1 From 60f3db31d4cb785befed715b80c430f60f647701 Mon Sep 17 00:00:00 2001
2 From: yaroslavros <yaroslavros@gmail.com>
3 Date: Wed, 14 Aug 2019 15:22:55 +0100
4 Subject: [PATCH] Ported pcie-brcmstb bounce buffer implementation to
7 Ported pcie-brcmstb bounce buffer implementation to ARM64.
8 This enables full 4G RAM usage on Raspberry Pi in 64-bit mode.
10 Signed-off-by: Yaroslav Rosomakho <yaroslavros@gmail.com>
12 arch/arm64/mm/dma-mapping.c | 29 +
13 drivers/pci/controller/Makefile | 3 +
14 drivers/pci/controller/pcie-brcmstb-bounce.h | 2 +-
15 .../pci/controller/pcie-brcmstb-bounce64.c | 569 ++++++++++++++++++
16 drivers/pci/controller/pcie-brcmstb.c | 32 +-
17 5 files changed, 610 insertions(+), 25 deletions(-)
18 create mode 100644 drivers/pci/controller/pcie-brcmstb-bounce64.c
20 --- a/arch/arm64/mm/dma-mapping.c
21 +++ b/arch/arm64/mm/dma-mapping.c
22 @@ -31,6 +31,35 @@ void arch_dma_prep_coherent(struct page
25 #ifdef CONFIG_IOMMU_DMA
26 +static int __swiotlb_get_sgtable_page(struct sg_table *sgt,
27 + struct page *page, size_t size)
29 + int ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
32 + sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0);
37 +static int __swiotlb_mmap_pfn(struct vm_area_struct *vma,
38 + unsigned long pfn, size_t size)
41 + unsigned long nr_vma_pages = vma_pages(vma);
42 + unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
43 + unsigned long off = vma->vm_pgoff;
45 + if (off < nr_pages && nr_vma_pages <= (nr_pages - off)) {
46 + ret = remap_pfn_range(vma, vma->vm_start,
48 + vma->vm_end - vma->vm_start,
55 void arch_teardown_dma_ops(struct device *dev)
58 --- a/drivers/pci/controller/Makefile
59 +++ b/drivers/pci/controller/Makefile
60 @@ -33,6 +33,9 @@ obj-$(CONFIG_PCIE_BRCMSTB) += pcie-brcms
62 obj-$(CONFIG_PCIE_BRCMSTB) += pcie-brcmstb-bounce.o
65 +obj-$(CONFIG_PCIE_BRCMSTB) += pcie-brcmstb-bounce64.o
68 obj-$(CONFIG_VMD) += vmd.o
69 # pcie-hisi.o quirks are needed even without CONFIG_PCIE_DW
70 --- a/drivers/pci/controller/pcie-brcmstb-bounce.h
71 +++ b/drivers/pci/controller/pcie-brcmstb-bounce.h
73 #ifndef _PCIE_BRCMSTB_BOUNCE_H
74 #define _PCIE_BRCMSTB_BOUNCE_H
77 +#if defined(CONFIG_ARM) || defined(CONFIG_ARM64)
79 int brcm_pcie_bounce_init(struct device *dev, unsigned long buffer_size,
80 dma_addr_t threshold);
82 +++ b/drivers/pci/controller/pcie-brcmstb-bounce64.c
85 + * This code started out as a version of arch/arm/common/dmabounce.c,
86 + * modified to cope with highmem pages. Now it has been changed heavily -
87 + * it now preallocates a large block (currently 4MB) and carves it up
88 + * sequentially in ring fashion, and DMA is used to copy the data - to the
89 + * point where very little of the original remains.
91 + * Copyright (C) 2019 Raspberry Pi (Trading) Ltd.
93 + * Original version by Brad Parker (brad@heeltoe.com)
94 + * Re-written by Christopher Hoover <ch@murgatroid.com>
95 + * Made generic by Deepak Saxena <dsaxena@plexity.net>
97 + * Copyright (C) 2002 Hewlett Packard Company.
98 + * Copyright (C) 2004 MontaVista Software, Inc.
100 + * This program is free software; you can redistribute it and/or
101 + * modify it under the terms of the GNU General Public License
102 + * version 2 as published by the Free Software Foundation.
105 +#include <linux/module.h>
106 +#include <linux/init.h>
107 +#include <linux/slab.h>
108 +#include <linux/page-flags.h>
109 +#include <linux/device.h>
110 +#include <linux/dma-mapping.h>
111 +#include <linux/dma-direct.h>
112 +#include <linux/dma-noncoherent.h>
113 +#include <linux/dmapool.h>
114 +#include <linux/list.h>
115 +#include <linux/scatterlist.h>
116 +#include <linux/bitmap.h>
117 +#include <linux/swiotlb.h>
119 +#include <asm/cacheflush.h>
124 +#define DO_STATS(X) do { X ; } while (0)
126 +#define DO_STATS(X) do { } while (0)
129 +/* ************************************************** */
131 +struct safe_buffer {
132 + struct list_head node;
134 + /* original request */
138 + struct dmabounce_pool *pool;
140 + dma_addr_t unsafe_dma_addr;
141 + dma_addr_t safe_dma_addr;
144 +struct dmabounce_pool {
145 + unsigned long pages;
147 + dma_addr_t dma_addr;
148 + unsigned long *alloc_map;
149 + unsigned long alloc_pos;
151 + struct device *dev;
152 + unsigned long num_pages;
155 + unsigned long num_bufs;
156 + unsigned long max_bufs;
157 + unsigned long max_pages;
161 +struct dmabounce_device_info {
162 + struct device *dev;
163 + dma_addr_t threshold;
164 + struct list_head safe_buffers;
165 + struct dmabounce_pool pool;
168 + unsigned long map_count;
169 + unsigned long unmap_count;
170 + unsigned long sync_dev_count;
171 + unsigned long sync_cpu_count;
172 + unsigned long fail_count;
177 +static struct dmabounce_device_info *g_dmabounce_device_info;
179 +extern int bcm2838_dma40_memcpy_init(void);
180 +extern void bcm2838_dma40_memcpy(dma_addr_t dst, dma_addr_t src, size_t size);
184 +bounce_show(struct device *dev, struct device_attribute *attr, char *buf)
186 + struct dmabounce_device_info *device_info = g_dmabounce_device_info;
187 + return sprintf(buf, "m:%lu/%lu s:%lu/%lu f:%lu s:%zu b:%lu/%lu a:%lu/%lu\n",
188 + device_info->map_count,
189 + device_info->unmap_count,
190 + device_info->sync_dev_count,
191 + device_info->sync_cpu_count,
192 + device_info->fail_count,
193 + device_info->pool.max_size,
194 + device_info->pool.num_bufs,
195 + device_info->pool.max_bufs,
196 + device_info->pool.num_pages * PAGE_SIZE,
197 + device_info->pool.max_pages * PAGE_SIZE);
200 +static DEVICE_ATTR(dmabounce_stats, 0444, bounce_show, NULL);
203 +static int bounce_create(struct dmabounce_pool *pool, struct device *dev,
204 + unsigned long buffer_size)
207 + pool->pages = (buffer_size + PAGE_SIZE - 1)/PAGE_SIZE;
208 + pool->alloc_map = bitmap_zalloc(pool->pages, GFP_KERNEL);
209 + if (!pool->alloc_map)
211 + pool->virt_addr = dma_alloc_coherent(dev, pool->pages * PAGE_SIZE,
212 + &pool->dma_addr, GFP_KERNEL);
213 + if (!pool->virt_addr)
216 + pool->alloc_pos = 0;
217 + spin_lock_init(&pool->lock);
219 + pool->num_pages = 0;
221 + DO_STATS(pool->max_size = 0);
222 + DO_STATS(pool->num_bufs = 0);
223 + DO_STATS(pool->max_bufs = 0);
224 + DO_STATS(pool->max_pages = 0);
229 + bitmap_free(pool->alloc_map);
234 +static void bounce_destroy(struct dmabounce_pool *pool)
236 + dma_free_coherent(pool->dev, pool->pages * PAGE_SIZE, pool->virt_addr,
239 + bitmap_free(pool->alloc_map);
242 +static void *bounce_alloc(struct dmabounce_pool *pool, size_t size,
243 + dma_addr_t *dmaaddrp)
245 + unsigned long pages;
246 + unsigned long flags;
249 + pages = (size + PAGE_SIZE - 1)/PAGE_SIZE;
251 + DO_STATS(pool->max_size = max(size, pool->max_size));
253 + spin_lock_irqsave(&pool->lock, flags);
254 + pos = bitmap_find_next_zero_area(pool->alloc_map, pool->pages,
255 + pool->alloc_pos, pages, 0);
256 + /* If not found, try from the start */
257 + if (pos >= pool->pages && pool->alloc_pos)
258 + pos = bitmap_find_next_zero_area(pool->alloc_map, pool->pages,
261 + if (pos >= pool->pages) {
262 + spin_unlock_irqrestore(&pool->lock, flags);
266 + bitmap_set(pool->alloc_map, pos, pages);
267 + pool->alloc_pos = (pos + pages) % pool->pages;
268 + pool->num_pages += pages;
270 + DO_STATS(pool->num_bufs++);
271 + DO_STATS(pool->max_bufs = max(pool->num_bufs, pool->max_bufs));
272 + DO_STATS(pool->max_pages = max(pool->num_pages, pool->max_pages));
274 + spin_unlock_irqrestore(&pool->lock, flags);
276 + *dmaaddrp = pool->dma_addr + pos * PAGE_SIZE;
278 + return pool->virt_addr + pos * PAGE_SIZE;
282 +bounce_free(struct dmabounce_pool *pool, void *buf, size_t size)
284 + unsigned long pages;
285 + unsigned long flags;
288 + pages = (size + PAGE_SIZE - 1)/PAGE_SIZE;
289 + pos = (buf - pool->virt_addr)/PAGE_SIZE;
291 + BUG_ON((buf - pool->virt_addr) & (PAGE_SIZE - 1));
293 + spin_lock_irqsave(&pool->lock, flags);
294 + bitmap_clear(pool->alloc_map, pos, pages);
295 + pool->num_pages -= pages;
296 + if (pool->num_pages == 0)
297 + pool->alloc_pos = 0;
298 + DO_STATS(pool->num_bufs--);
299 + spin_unlock_irqrestore(&pool->lock, flags);
302 +/* allocate a 'safe' buffer and keep track of it */
303 +static struct safe_buffer *
304 +alloc_safe_buffer(struct dmabounce_device_info *device_info,
305 + dma_addr_t dma_addr, size_t size, enum dma_data_direction dir)
307 + struct safe_buffer *buf;
308 + struct dmabounce_pool *pool = &device_info->pool;
309 + struct device *dev = device_info->dev;
310 + unsigned long flags;
313 + * Although one might expect this to be called in thread context,
314 + * using GFP_KERNEL here leads to hard-to-debug lockups. in_atomic()
315 + * was previously used to select the appropriate allocation mode,
316 + * but this is unsafe.
318 + buf = kmalloc(sizeof(struct safe_buffer), GFP_ATOMIC);
320 + dev_warn(dev, "%s: kmalloc failed\n", __func__);
324 + buf->unsafe_dma_addr = dma_addr;
326 + buf->direction = dir;
329 + buf->safe = bounce_alloc(pool, size, &buf->safe_dma_addr);
333 + "%s: could not alloc dma memory (size=%zu)\n",
339 + write_lock_irqsave(&device_info->lock, flags);
340 + list_add(&buf->node, &device_info->safe_buffers);
341 + write_unlock_irqrestore(&device_info->lock, flags);
346 +/* determine if a buffer is from our "safe" pool */
347 +static struct safe_buffer *
348 +find_safe_buffer(struct dmabounce_device_info *device_info,
349 + dma_addr_t safe_dma_addr)
351 + struct safe_buffer *b, *rb = NULL;
352 + unsigned long flags;
354 + read_lock_irqsave(&device_info->lock, flags);
356 + list_for_each_entry(b, &device_info->safe_buffers, node)
357 + if (b->safe_dma_addr <= safe_dma_addr &&
358 + b->safe_dma_addr + b->size > safe_dma_addr) {
363 + read_unlock_irqrestore(&device_info->lock, flags);
368 +free_safe_buffer(struct dmabounce_device_info *device_info,
369 + struct safe_buffer *buf)
371 + unsigned long flags;
373 + write_lock_irqsave(&device_info->lock, flags);
374 + list_del(&buf->node);
375 + write_unlock_irqrestore(&device_info->lock, flags);
377 + bounce_free(buf->pool, buf->safe, buf->size);
382 +/* ************************************************** */
384 +static struct safe_buffer *
385 +find_safe_buffer_dev(struct device *dev, dma_addr_t dma_addr, const char *where)
387 + if (!dev || !g_dmabounce_device_info)
389 + if (dma_mapping_error(dev, dma_addr)) {
390 + dev_err(dev, "Trying to %s invalid mapping\n", where);
393 + return find_safe_buffer(g_dmabounce_device_info, dma_addr);
397 +map_single(struct device *dev, struct safe_buffer *buf, size_t size,
398 + enum dma_data_direction dir, unsigned long attrs)
400 + BUG_ON(buf->size != size);
401 + BUG_ON(buf->direction != dir);
403 + dev_dbg(dev, "map: %llx->%llx\n", (u64)buf->unsafe_dma_addr,
404 + (u64)buf->safe_dma_addr);
406 + if ((dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) &&
407 + !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
408 + bcm2838_dma40_memcpy(buf->safe_dma_addr, buf->unsafe_dma_addr,
411 + return buf->safe_dma_addr;
415 +unmap_single(struct device *dev, struct safe_buffer *buf, size_t size,
416 + enum dma_data_direction dir, unsigned long attrs)
418 + BUG_ON(buf->size != size);
419 + BUG_ON(buf->direction != dir);
421 + if ((dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) &&
422 + !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) {
423 + dev_dbg(dev, "unmap: %llx->%llx\n", (u64)buf->safe_dma_addr,
424 + (u64)buf->unsafe_dma_addr);
426 + bcm2838_dma40_memcpy(buf->unsafe_dma_addr, buf->safe_dma_addr,
429 + return buf->unsafe_dma_addr;
432 +/* ************************************************** */
435 + * see if a buffer address is in an 'unsafe' range. if it is
436 + * allocate a 'safe' buffer and copy the unsafe buffer into it.
437 + * substitute the safe buffer for the unsafe one.
438 + * (basically move the buffer from an unsafe area to a safe one)
441 +dmabounce_map_page(struct device *dev, struct page *page, unsigned long offset,
442 + size_t size, enum dma_data_direction dir,
443 + unsigned long attrs)
445 + struct dmabounce_device_info *device_info = g_dmabounce_device_info;
446 + dma_addr_t dma_addr;
448 + dma_addr = phys_to_dma(dev, page_to_phys(page)) + offset;
450 + dma_direct_sync_single_for_device(dev, dma_addr, size, dir);
451 + if (!dev_is_dma_coherent(dev))
452 + __dma_map_area(phys_to_virt(dma_to_phys(dev, dma_addr)), size, dir);
454 + if (device_info && (dma_addr + size) > device_info->threshold) {
455 + struct safe_buffer *buf;
457 + buf = alloc_safe_buffer(device_info, dma_addr, size, dir);
459 + DO_STATS(device_info->fail_count++);
460 + return (~(dma_addr_t)0x0);
463 + DO_STATS(device_info->map_count++);
465 + dma_addr = map_single(dev, buf, size, dir, attrs);
471 + * see if a mapped address was really a "safe" buffer and if so, copy
472 + * the data from the safe buffer back to the unsafe buffer and free up
473 + * the safe buffer. (basically return things back to the way they
477 +dmabounce_unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size,
478 + enum dma_data_direction dir, unsigned long attrs)
480 + struct safe_buffer *buf;
482 + buf = find_safe_buffer_dev(dev, dma_addr, __func__);
484 + DO_STATS(g_dmabounce_device_info->unmap_count++);
485 + dma_addr = unmap_single(dev, buf, size, dir, attrs);
486 + free_safe_buffer(g_dmabounce_device_info, buf);
489 + if (!dev_is_dma_coherent(dev))
490 + __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dma_addr)), size, dir);
491 + dma_direct_sync_single_for_cpu(dev, dma_addr, size, dir);
495 + * A version of dmabounce_map_page that assumes the mapping has already
496 + * been created - intended for streaming operation.
499 +dmabounce_sync_for_device(struct device *dev, dma_addr_t dma_addr, size_t size,
500 + enum dma_data_direction dir)
502 + struct safe_buffer *buf;
504 + dma_direct_sync_single_for_device(dev, dma_addr, size, dir);
505 + if (!dev_is_dma_coherent(dev))
506 + __dma_map_area(phys_to_virt(dma_to_phys(dev, dma_addr)), size, dir);
508 + buf = find_safe_buffer_dev(dev, dma_addr, __func__);
510 + DO_STATS(g_dmabounce_device_info->sync_dev_count++);
511 + map_single(dev, buf, size, dir, 0);
516 + * A version of dmabounce_unmap_page that doesn't destroy the mapping -
517 + * intended for streaming operation.
520 +dmabounce_sync_for_cpu(struct device *dev, dma_addr_t dma_addr,
521 + size_t size, enum dma_data_direction dir)
523 + struct safe_buffer *buf;
525 + buf = find_safe_buffer_dev(dev, dma_addr, __func__);
527 + DO_STATS(g_dmabounce_device_info->sync_cpu_count++);
528 + dma_addr = unmap_single(dev, buf, size, dir, 0);
531 + if (!dev_is_dma_coherent(dev))
532 + __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dma_addr)), size, dir);
533 + dma_direct_sync_single_for_cpu(dev, dma_addr, size, dir);
536 +static int dmabounce_dma_supported(struct device *dev, u64 dma_mask)
538 + if (g_dmabounce_device_info)
541 + return dma_direct_supported(dev, dma_mask);
544 +static const struct dma_map_ops dmabounce_ops = {
545 + .alloc = dma_direct_alloc,
546 + .free = dma_direct_free,
547 + .map_page = dmabounce_map_page,
548 + .unmap_page = dmabounce_unmap_page,
549 + .sync_single_for_cpu = dmabounce_sync_for_cpu,
550 + .sync_single_for_device = dmabounce_sync_for_device,
551 + .map_sg = dma_direct_map_sg,
552 + .unmap_sg = dma_direct_unmap_sg,
553 + .sync_sg_for_cpu = dma_direct_sync_sg_for_cpu,
554 + .sync_sg_for_device = dma_direct_sync_sg_for_device,
555 + .dma_supported = dmabounce_dma_supported,
558 +int brcm_pcie_bounce_init(struct device *dev,
559 + unsigned long buffer_size,
560 + dma_addr_t threshold)
562 + struct dmabounce_device_info *device_info;
565 + /* Only support a single client */
566 + if (g_dmabounce_device_info)
569 + ret = bcm2838_dma40_memcpy_init();
573 + device_info = kmalloc(sizeof(struct dmabounce_device_info), GFP_ATOMIC);
574 + if (!device_info) {
576 + "Could not allocated dmabounce_device_info\n");
580 + ret = bounce_create(&device_info->pool, dev, buffer_size);
583 + "dmabounce: could not allocate %ld byte DMA pool\n",
588 + device_info->dev = dev;
589 + device_info->threshold = threshold;
590 + INIT_LIST_HEAD(&device_info->safe_buffers);
591 + rwlock_init(&device_info->lock);
593 + DO_STATS(device_info->map_count = 0);
594 + DO_STATS(device_info->unmap_count = 0);
595 + DO_STATS(device_info->sync_dev_count = 0);
596 + DO_STATS(device_info->sync_cpu_count = 0);
597 + DO_STATS(device_info->fail_count = 0);
598 + DO_STATS(device_info->attr_res =
599 + device_create_file(dev, &dev_attr_dmabounce_stats));
601 + g_dmabounce_device_info = device_info;
603 + dev_err(dev, "dmabounce: initialised - %ld kB, threshold %pad\n",
604 + buffer_size / 1024, &threshold);
609 + kfree(device_info);
612 +EXPORT_SYMBOL(brcm_pcie_bounce_init);
614 +void brcm_pcie_bounce_uninit(struct device *dev)
616 + struct dmabounce_device_info *device_info = g_dmabounce_device_info;
618 + g_dmabounce_device_info = NULL;
620 + if (!device_info) {
622 + "Never registered with dmabounce but attempting"
623 + "to unregister!\n");
627 + if (!list_empty(&device_info->safe_buffers)) {
629 + "Removing from dmabounce with pending buffers!\n");
633 + bounce_destroy(&device_info->pool);
635 + DO_STATS(if (device_info->attr_res == 0)
636 + device_remove_file(dev, &dev_attr_dmabounce_stats));
638 + kfree(device_info);
640 +EXPORT_SYMBOL(brcm_pcie_bounce_uninit);
642 +int brcm_pcie_bounce_register_dev(struct device *dev)
644 + set_dma_ops(dev, &dmabounce_ops);
648 +EXPORT_SYMBOL(brcm_pcie_bounce_register_dev);
650 +MODULE_AUTHOR("Phil Elwell <phil@raspberrypi.org>");
651 +MODULE_DESCRIPTION("Dedicate DMA bounce support for pcie-brcmstb");
652 +MODULE_LICENSE("GPL");
653 --- a/drivers/pci/controller/pcie-brcmstb.c
654 +++ b/drivers/pci/controller/pcie-brcmstb.c
655 @@ -611,28 +611,6 @@ static const struct dma_map_ops brcm_dma
657 static void brcm_set_dma_ops(struct device *dev)
661 - if (IS_ENABLED(CONFIG_ARM64)) {
663 - * We are going to invoke get_dma_ops(). That
664 - * function, at this point in time, invokes
665 - * get_arch_dma_ops(), and for ARM64 that function
666 - * returns a pointer to dummy_dma_ops. So then we'd
667 - * like to call arch_setup_dma_ops(), but that isn't
668 - * exported. Instead, we call of_dma_configure(),
669 - * which is exported, and this calls
670 - * arch_setup_dma_ops(). Once we do this the call to
671 - * get_dma_ops() will work properly because
672 - * dev->dma_ops will be set.
674 - ret = of_dma_configure(dev, dev->of_node, true);
676 - dev_err(dev, "of_dma_configure() failed: %d\n", ret);
681 arch_dma_ops = get_dma_ops(dev);
683 dev_err(dev, "failed to get arch_dma_ops\n");
684 @@ -651,12 +629,12 @@ static int brcmstb_platform_notifier(str
685 extern unsigned long max_pfn;
686 struct device *dev = __dev;
687 const char *rc_name = "0000:00:00.0";
691 case BUS_NOTIFY_ADD_DEVICE:
692 if (max_pfn > (bounce_threshold/PAGE_SIZE) &&
693 strcmp(dev->kobj.name, rc_name)) {
696 ret = brcm_pcie_bounce_register_dev(dev);
698 @@ -665,8 +643,14 @@ static int brcmstb_platform_notifier(str
702 - brcm_set_dma_ops(dev);
703 + } else if (IS_ENABLED(CONFIG_ARM64)) {
704 + ret = of_dma_configure(dev, dev->of_node, true);
706 + dev_err(dev, "of_dma_configure() failed: %d\n", ret);
710 + brcm_set_dma_ops(dev);
713 case BUS_NOTIFY_DEL_DEVICE: