1 From ebd731dd71ec9728a5a87ec1cd695be15828c32c Mon Sep 17 00:00:00 2001
2 From: popcornmix <popcornmix@gmail.com>
3 Date: Mon, 28 Nov 2016 16:50:04 +0000
4 Subject: [PATCH] Improve __copy_to_user and __copy_from_user performance
6 Provide a __copy_from_user that uses memcpy. On BCM2708, use
7 optimised memcpy/memmove/memcmp/memset implementations.
9 arch/arm: Add mmiocpy/set aliases for memcpy/set
11 See: https://github.com/raspberrypi/linux/issues/1082
13 copy_from_user: CPU_SW_DOMAIN_PAN compatibility
15 The downstream copy_from_user acceleration must also play nice with
16 CONFIG_CPU_SW_DOMAIN_PAN.
18 See: https://github.com/raspberrypi/linux/issues/1381
20 Signed-off-by: Phil Elwell <phil@raspberrypi.org>
22 arch/arm/include/asm/string.h | 5 +
23 arch/arm/include/asm/uaccess.h | 3 +
24 arch/arm/lib/Makefile | 15 +-
25 arch/arm/lib/arm-mem.h | 159 ++++++++++++
26 arch/arm/lib/copy_from_user.S | 4 +-
27 arch/arm/lib/exports_rpi.c | 37 +++
28 arch/arm/lib/memcmp_rpi.S | 285 +++++++++++++++++++++
29 arch/arm/lib/memcpy_rpi.S | 61 +++++
30 arch/arm/lib/memcpymove.h | 506 +++++++++++++++++++++++++++++++++++++
31 arch/arm/lib/memmove_rpi.S | 61 +++++
32 arch/arm/lib/memset_rpi.S | 123 +++++++++
33 arch/arm/lib/uaccess_with_memcpy.c | 120 ++++++++-
34 arch/arm/mach-bcm/Kconfig | 7 +
35 13 files changed, 1380 insertions(+), 6 deletions(-)
36 create mode 100644 arch/arm/lib/arm-mem.h
37 create mode 100644 arch/arm/lib/exports_rpi.c
38 create mode 100644 arch/arm/lib/memcmp_rpi.S
39 create mode 100644 arch/arm/lib/memcpy_rpi.S
40 create mode 100644 arch/arm/lib/memcpymove.h
41 create mode 100644 arch/arm/lib/memmove_rpi.S
42 create mode 100644 arch/arm/lib/memset_rpi.S
44 --- a/arch/arm/include/asm/string.h
45 +++ b/arch/arm/include/asm/string.h
46 @@ -24,6 +24,11 @@ extern void * memchr(const void *, int,
47 #define __HAVE_ARCH_MEMSET
48 extern void * memset(void *, int, __kernel_size_t);
50 +#ifdef CONFIG_BCM2835_FAST_MEMCPY
51 +#define __HAVE_ARCH_MEMCMP
52 +extern int memcmp(const void *, const void *, size_t);
55 extern void __memzero(void *ptr, __kernel_size_t n);
57 #define memset(p,v,n) \
58 --- a/arch/arm/include/asm/uaccess.h
59 +++ b/arch/arm/include/asm/uaccess.h
60 @@ -477,6 +477,9 @@ do { \
61 extern unsigned long __must_check
62 arm_copy_from_user(void *to, const void __user *from, unsigned long n);
64 +extern unsigned long __must_check
65 +__copy_from_user_std(void *to, const void __user *from, unsigned long n);
67 static inline unsigned long __must_check
68 __copy_from_user(void *to, const void __user *from, unsigned long n)
70 --- a/arch/arm/lib/Makefile
71 +++ b/arch/arm/lib/Makefile
74 lib-y := backtrace.o changebit.o csumipv6.o csumpartial.o \
75 csumpartialcopy.o csumpartialcopyuser.o clearbit.o \
76 - delay.o delay-loop.o findbit.o memchr.o memcpy.o \
77 - memmove.o memset.o memzero.o setbit.o \
78 - strchr.o strrchr.o \
79 + delay.o delay-loop.o findbit.o memchr.o memzero.o \
80 + setbit.o strchr.o strrchr.o \
81 testchangebit.o testclearbit.o testsetbit.o \
82 ashldi3.o ashrdi3.o lshrdi3.o muldi3.o \
83 ucmpdi2.o lib1funcs.o div64.o \
84 @@ -18,6 +17,16 @@ lib-y := backtrace.o changebit.o csumip
85 mmu-y := clear_user.o copy_page.o getuser.o putuser.o \
86 copy_from_user.o copy_to_user.o
88 +# Choose optimised implementations for Raspberry Pi
89 +ifeq ($(CONFIG_BCM2835_FAST_MEMCPY),y)
90 + CFLAGS_uaccess_with_memcpy.o += -DCOPY_FROM_USER_THRESHOLD=1600
91 + CFLAGS_uaccess_with_memcpy.o += -DCOPY_TO_USER_THRESHOLD=672
92 + obj-$(CONFIG_MODULES) += exports_rpi.o
93 + lib-y += memcpy_rpi.o memmove_rpi.o memset_rpi.o memcmp_rpi.o
95 + lib-y += memcpy.o memmove.o memset.o
98 # using lib_ here won't override already available weak symbols
99 obj-$(CONFIG_UACCESS_WITH_MEMCPY) += uaccess_with_memcpy.o
102 +++ b/arch/arm/lib/arm-mem.h
105 +Copyright (c) 2013, Raspberry Pi Foundation
106 +Copyright (c) 2013, RISC OS Open Ltd
107 +All rights reserved.
109 +Redistribution and use in source and binary forms, with or without
110 +modification, are permitted provided that the following conditions are met:
111 + * Redistributions of source code must retain the above copyright
112 + notice, this list of conditions and the following disclaimer.
113 + * Redistributions in binary form must reproduce the above copyright
114 + notice, this list of conditions and the following disclaimer in the
115 + documentation and/or other materials provided with the distribution.
116 + * Neither the name of the copyright holder nor the
117 + names of its contributors may be used to endorse or promote products
118 + derived from this software without specific prior written permission.
120 +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
121 +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
122 +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
123 +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
124 +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
125 +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
126 +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
127 +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
128 +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
129 +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
138 +.macro preload_leading_step1 backwards, ptr, base
139 +/* If the destination is already 16-byte aligned, then we need to preload
140 + * between 0 and prefetch_distance (inclusive) cache lines ahead so there
141 + * are no gaps when the inner loop starts.
150 + .rept prefetch_distance+1
153 + .set OFFSET, OFFSET-32
155 + .set OFFSET, OFFSET+32
160 +.macro preload_leading_step2 backwards, ptr, base, leading_bytes, tmp
161 +/* However, if the destination is not 16-byte aligned, we may need to
162 + * preload one more cache line than that. The question we need to ask is:
163 + * are the leading bytes more than the amount by which the source
164 + * pointer will be rounded down for preloading, and if so, by how many
168 +/* Here we compare against how many bytes we are into the
169 + * cache line, counting down from the highest such address.
170 + * Effectively, we want to calculate
171 + * leading_bytes = dst&15
172 + * cacheline_offset = 31-((src-leading_bytes-1)&31)
173 + * extra_needed = leading_bytes - cacheline_offset
174 + * and test if extra_needed is <= 0, or rearranging:
175 + * leading_bytes + (src-leading_bytes-1)&31 <= 31
177 + mov tmp, base, lsl #32-5
178 + sbc tmp, tmp, leading_bytes, lsl #32-5
179 + adds tmp, tmp, leading_bytes, lsl #32-5
181 + pld [ptr, #-32*(prefetch_distance+1)]
183 +/* Effectively, we want to calculate
184 + * leading_bytes = (-dst)&15
185 + * cacheline_offset = (src+leading_bytes)&31
186 + * extra_needed = leading_bytes - cacheline_offset
187 + * and test if extra_needed is <= 0.
189 + mov tmp, base, lsl #32-5
190 + add tmp, tmp, leading_bytes, lsl #32-5
191 + rsbs tmp, tmp, leading_bytes, lsl #32-5
193 + pld [ptr, #32*(prefetch_distance+1)]
198 +.macro preload_trailing backwards, base, remain, tmp
199 + /* We need either 0, 1 or 2 extra preloads */
202 + mov tmp, tmp, lsl #32-5
204 + mov tmp, base, lsl #32-5
206 + adds tmp, tmp, remain, lsl #32-5
207 + adceqs tmp, tmp, #0
208 + /* The instruction above has two effects: ensures Z is only
209 + * set if C was clear (so Z indicates that both shifted quantities
210 + * were 0), and clears C if Z was set (so C indicates that the sum
211 + * of the shifted quantities was greater and not equal to 32) */
221 + pld [tmp, #-32*(prefetch_distance+1)]
223 + pld [tmp, #-32*prefetch_distance]
225 + pld [tmp, #32*(prefetch_distance+2)]
227 + pld [tmp, #32*(prefetch_distance+1)]
232 +.macro preload_all backwards, narrow_case, shift, base, remain, tmp0, tmp1
235 + bic tmp0, tmp0, #31
237 + sub tmp1, base, remain, lsl #shift
239 + bic tmp0, base, #31
241 + add tmp1, base, remain, lsl #shift
244 + bic tmp1, tmp1, #31
248 + /* In this case, all the data fits in either 1 or 2 cache lines */
253 + sub tmp0, tmp0, #32
255 + add tmp0, tmp0, #32
263 --- a/arch/arm/lib/copy_from_user.S
264 +++ b/arch/arm/lib/copy_from_user.S
269 -ENTRY(arm_copy_from_user)
270 +ENTRY(__copy_from_user_std)
271 +WEAK(arm_copy_from_user)
273 #include "copy_template.S"
275 ENDPROC(arm_copy_from_user)
276 +ENDPROC(__copy_from_user_std)
278 .pushsection .fixup,"ax"
281 +++ b/arch/arm/lib/exports_rpi.c
284 + * Copyright (c) 2014, Raspberry Pi (Trading) Ltd.
286 + * Redistribution and use in source and binary forms, with or without
287 + * modification, are permitted provided that the following conditions
289 + * 1. Redistributions of source code must retain the above copyright
290 + * notice, this list of conditions, and the following disclaimer,
291 + * without modification.
292 + * 2. Redistributions in binary form must reproduce the above copyright
293 + * notice, this list of conditions and the following disclaimer in the
294 + * documentation and/or other materials provided with the distribution.
295 + * 3. The names of the above-listed copyright holders may not be used
296 + * to endorse or promote products derived from this software without
297 + * specific prior written permission.
299 + * ALTERNATIVELY, this software may be distributed under the terms of the
300 + * GNU General Public License ("GPL") version 2, as published by the Free
301 + * Software Foundation.
303 + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
304 + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
305 + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
306 + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
307 + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
308 + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
309 + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
310 + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
311 + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
312 + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
313 + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
316 +#include <linux/kernel.h>
317 +#include <linux/module.h>
319 +EXPORT_SYMBOL(memcmp);
321 +++ b/arch/arm/lib/memcmp_rpi.S
324 +Copyright (c) 2013, Raspberry Pi Foundation
325 +Copyright (c) 2013, RISC OS Open Ltd
326 +All rights reserved.
328 +Redistribution and use in source and binary forms, with or without
329 +modification, are permitted provided that the following conditions are met:
330 + * Redistributions of source code must retain the above copyright
331 + notice, this list of conditions and the following disclaimer.
332 + * Redistributions in binary form must reproduce the above copyright
333 + notice, this list of conditions and the following disclaimer in the
334 + documentation and/or other materials provided with the distribution.
335 + * Neither the name of the copyright holder nor the
336 + names of its contributors may be used to endorse or promote products
337 + derived from this software without specific prior written permission.
339 +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
340 +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
341 +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
342 +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
343 +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
344 +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
345 +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
346 +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
347 +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
348 +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
351 +#include <linux/linkage.h>
352 +#include "arm-mem.h"
354 +/* Prevent the stack from becoming executable */
355 +#if defined(__linux__) && defined(__ELF__)
356 +.section .note.GNU-stack,"",%progbits
366 +.macro memcmp_process_head unaligned
368 + ldr DAT0, [S_1], #4
369 + ldr DAT1, [S_1], #4
370 + ldr DAT2, [S_1], #4
371 + ldr DAT3, [S_1], #4
373 + ldmia S_1!, {DAT0, DAT1, DAT2, DAT3}
375 + ldmia S_2!, {DAT4, DAT5, DAT6, DAT7}
378 +.macro memcmp_process_tail
386 +.macro memcmp_leading_31bytes
387 + movs DAT0, OFF, lsl #31
388 + ldrmib DAT0, [S_1], #1
389 + ldrcsh DAT1, [S_1], #2
390 + ldrmib DAT4, [S_2], #1
391 + ldrcsh DAT5, [S_2], #2
401 + movs DAT0, OFF, lsl #29
402 + ldrmi DAT0, [S_1], #4
403 + ldrcs DAT1, [S_1], #4
404 + ldrcs DAT2, [S_1], #4
405 + ldrmi DAT4, [S_2], #4
406 + ldmcsia S_2!, {DAT5, DAT6}
421 + memcmp_process_head 1
423 + memcmp_process_tail
427 +.macro memcmp_trailing_15bytes unaligned
430 + ldrcs DAT0, [S_1], #4
431 + ldrcs DAT1, [S_1], #4
433 + ldmcsia S_1!, {DAT0, DAT1}
435 + ldrmi DAT2, [S_1], #4
436 + ldmcsia S_2!, {DAT4, DAT5}
437 + ldrmi DAT6, [S_2], #4
449 + ldrcsh DAT0, [S_1], #2
451 + ldrcsh DAT4, [S_2], #2
462 +.macro memcmp_long_inner_loop unaligned
464 + memcmp_process_head unaligned
465 + pld [S_2, #prefetch_distance*32 + 16]
466 + memcmp_process_tail
467 + memcmp_process_head unaligned
469 + memcmp_process_tail
472 + /* Just before the final (prefetch_distance+1) 32-byte blocks,
473 + * deal with final preloads */
474 + preload_trailing 0, S_1, N, DAT0
475 + preload_trailing 0, S_2, N, DAT0
476 + add N, N, #(prefetch_distance+2)*32 - 16
478 + memcmp_process_head unaligned
479 + memcmp_process_tail
482 + /* Trailing words and bytes */
485 + memcmp_trailing_15bytes unaligned
486 +199: /* Reached end without detecting a difference */
489 + pop {DAT1-DAT6, pc}
492 +.macro memcmp_short_inner_loop unaligned
493 + subs N, N, #16 /* simplifies inner loop termination */
496 + memcmp_process_head unaligned
497 + memcmp_process_tail
500 +122: /* Trailing words and bytes */
503 + memcmp_trailing_15bytes unaligned
504 +199: /* Reached end without detecting a difference */
507 + pop {DAT1-DAT6, pc}
511 + * int memcmp(const void *s1, const void *s2, size_t n);
513 + * a1 = pointer to buffer 1
514 + * a2 = pointer to buffer 2
515 + * a3 = number of bytes to compare (as unsigned chars)
517 + * a1 = >0/=0/<0 if s1 >/=/< s2
520 +.set prefetch_distance, 2
536 + push {DAT1-DAT6, lr}
537 + setend be /* lowest-addressed bytes are most significant */
539 + /* To preload ahead as we go, we need at least (prefetch_distance+2) 32-byte blocks */
540 + cmp N, #(prefetch_distance+3)*32 - 1
544 + /* Adjust N so that the decrement instruction can also test for
545 + * inner loop termination. We want it to stop when there are
546 + * (prefetch_distance+1) complete blocks to go. */
547 + sub N, N, #(prefetch_distance+2)*32
548 + preload_leading_step1 0, DAT0, S_1
549 + preload_leading_step1 0, DAT1, S_2
552 + rsb OFF, S_2, #0 /* no need to AND with 15 here */
553 + preload_leading_step2 0, DAT0, S_1, OFF, DAT2
554 + preload_leading_step2 0, DAT1, S_2, OFF, DAT2
555 + memcmp_leading_31bytes
556 +154: /* Second source now cacheline (32-byte) aligned; we have at
557 + * least one prefetch to go. */
558 + /* Prefetch offset is best selected such that it lies in the
559 + * first 8 of each 32 bytes - but it's just as easy to aim for
562 + rsb OFF, OFF, #32*prefetch_distance
565 + memcmp_long_inner_loop 0
566 +140: memcmp_long_inner_loop 1
568 +170: /* Short case */
571 + preload_all 0, 0, 0, S_1, N, DAT0, DAT1
572 + preload_all 0, 0, 0, S_2, N, DAT0, DAT1
577 + ldrb DAT0, [S_1], #1
578 + ldrb DAT4, [S_2], #1
583 +174: /* Second source now 4-byte aligned; we have 0 or more bytes to go */
586 + memcmp_short_inner_loop 0
587 +140: memcmp_short_inner_loop 1
589 +200: /* Difference found: determine sign. */
593 + pop {DAT1-DAT6, pc}
609 +++ b/arch/arm/lib/memcpy_rpi.S
612 +Copyright (c) 2013, Raspberry Pi Foundation
613 +Copyright (c) 2013, RISC OS Open Ltd
614 +All rights reserved.
616 +Redistribution and use in source and binary forms, with or without
617 +modification, are permitted provided that the following conditions are met:
618 + * Redistributions of source code must retain the above copyright
619 + notice, this list of conditions and the following disclaimer.
620 + * Redistributions in binary form must reproduce the above copyright
621 + notice, this list of conditions and the following disclaimer in the
622 + documentation and/or other materials provided with the distribution.
623 + * Neither the name of the copyright holder nor the
624 + names of its contributors may be used to endorse or promote products
625 + derived from this software without specific prior written permission.
627 +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
628 +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
629 +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
630 +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
631 +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
632 +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
633 +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
634 +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
635 +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
636 +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
639 +#include <linux/linkage.h>
640 +#include "arm-mem.h"
641 +#include "memcpymove.h"
643 +/* Prevent the stack from becoming executable */
644 +#if defined(__linux__) && defined(__ELF__)
645 +.section .note.GNU-stack,"",%progbits
656 + * void *memcpy(void * restrict s1, const void * restrict s2, size_t n);
658 + * a1 = pointer to destination
659 + * a2 = pointer to source
660 + * a3 = number of bytes to copy
665 +.set prefetch_distance, 3
673 +++ b/arch/arm/lib/memcpymove.h
676 +Copyright (c) 2013, Raspberry Pi Foundation
677 +Copyright (c) 2013, RISC OS Open Ltd
678 +All rights reserved.
680 +Redistribution and use in source and binary forms, with or without
681 +modification, are permitted provided that the following conditions are met:
682 + * Redistributions of source code must retain the above copyright
683 + notice, this list of conditions and the following disclaimer.
684 + * Redistributions in binary form must reproduce the above copyright
685 + notice, this list of conditions and the following disclaimer in the
686 + documentation and/or other materials provided with the distribution.
687 + * Neither the name of the copyright holder nor the
688 + names of its contributors may be used to endorse or promote products
689 + derived from this software without specific prior written permission.
691 +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
692 +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
693 +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
694 +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
695 +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
696 +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
697 +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
698 +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
699 +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
700 +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
703 +.macro unaligned_words backwards, align, use_pld, words, r0, r1, r2, r3, r4, r5, r6, r7, r8
706 + mov r1, r0, lsl #32-align*8
708 + orr r1, r1, r0, lsr #align*8
711 + mov r0, r1, lsr #align*8
713 + orr r0, r0, r1, lsl #32-align*8
719 + mov r2, r0, lsl #32-align*8
721 + orr r2, r2, r1, lsr #align*8
722 + mov r1, r1, lsl #32-align*8
723 + orr r1, r1, r0, lsr #align*8
727 + mov r0, r2, lsr #align*8
729 + orr r0, r0, r1, lsl #32-align*8
730 + mov r1, r1, lsr #align*8
731 + orr r1, r1, r2, lsl #32-align*8
737 + mov r4, r0, lsl #32-align*8
739 + orr r4, r4, r3, lsr #align*8
740 + mov r3, r3, lsl #32-align*8
741 + orr r3, r3, r2, lsr #align*8
742 + mov r2, r2, lsl #32-align*8
743 + orr r2, r2, r1, lsr #align*8
744 + mov r1, r1, lsl #32-align*8
745 + orr r1, r1, r0, lsr #align*8
746 + stmdb D!, {r1, r2, r3, r4}
749 + mov r0, r4, lsr #align*8
751 + orr r0, r0, r1, lsl #32-align*8
752 + mov r1, r1, lsr #align*8
753 + orr r1, r1, r2, lsl #32-align*8
754 + mov r2, r2, lsr #align*8
755 + orr r2, r2, r3, lsl #32-align*8
756 + mov r3, r3, lsr #align*8
757 + orr r3, r3, r4, lsl #32-align*8
758 + stmia D!, {r0, r1, r2, r3}
762 + ldmdb S!, {r4, r5, r6, r7}
763 + mov r8, r0, lsl #32-align*8
764 + ldmdb S!, {r0, r1, r2, r3}
768 + orr r8, r8, r7, lsr #align*8
769 + mov r7, r7, lsl #32-align*8
770 + orr r7, r7, r6, lsr #align*8
771 + mov r6, r6, lsl #32-align*8
772 + orr r6, r6, r5, lsr #align*8
773 + mov r5, r5, lsl #32-align*8
774 + orr r5, r5, r4, lsr #align*8
775 + mov r4, r4, lsl #32-align*8
776 + orr r4, r4, r3, lsr #align*8
777 + mov r3, r3, lsl #32-align*8
778 + orr r3, r3, r2, lsr #align*8
779 + mov r2, r2, lsl #32-align*8
780 + orr r2, r2, r1, lsr #align*8
781 + mov r1, r1, lsl #32-align*8
782 + orr r1, r1, r0, lsr #align*8
783 + stmdb D!, {r5, r6, r7, r8}
784 + stmdb D!, {r1, r2, r3, r4}
786 + ldmib S!, {r1, r2, r3, r4}
787 + mov r0, r8, lsr #align*8
788 + ldmib S!, {r5, r6, r7, r8}
792 + orr r0, r0, r1, lsl #32-align*8
793 + mov r1, r1, lsr #align*8
794 + orr r1, r1, r2, lsl #32-align*8
795 + mov r2, r2, lsr #align*8
796 + orr r2, r2, r3, lsl #32-align*8
797 + mov r3, r3, lsr #align*8
798 + orr r3, r3, r4, lsl #32-align*8
799 + mov r4, r4, lsr #align*8
800 + orr r4, r4, r5, lsl #32-align*8
801 + mov r5, r5, lsr #align*8
802 + orr r5, r5, r6, lsl #32-align*8
803 + mov r6, r6, lsr #align*8
804 + orr r6, r6, r7, lsl #32-align*8
805 + mov r7, r7, lsr #align*8
806 + orr r7, r7, r8, lsl #32-align*8
807 + stmia D!, {r0, r1, r2, r3}
808 + stmia D!, {r4, r5, r6, r7}
813 +.macro memcpy_leading_15bytes backwards, align
814 + movs DAT1, DAT2, lsl #31
817 + ldrmib DAT0, [S, #-1]!
818 + ldrcsh DAT1, [S, #-2]!
819 + strmib DAT0, [D, #-1]!
820 + strcsh DAT1, [D, #-2]!
822 + ldrmib DAT0, [S], #1
823 + ldrcsh DAT1, [S], #2
824 + strmib DAT0, [D], #1
825 + strcsh DAT1, [D], #2
827 + movs DAT1, DAT2, lsl #29
829 + ldrmi DAT0, [S, #-4]!
831 + ldmcsdb S!, {DAT1, DAT2}
833 + ldrcs DAT2, [S, #-4]!
834 + ldrcs DAT1, [S, #-4]!
836 + strmi DAT0, [D, #-4]!
837 + stmcsdb D!, {DAT1, DAT2}
839 + ldrmi DAT0, [S], #4
841 + ldmcsia S!, {DAT1, DAT2}
843 + ldrcs DAT1, [S], #4
844 + ldrcs DAT2, [S], #4
846 + strmi DAT0, [D], #4
847 + stmcsia D!, {DAT1, DAT2}
851 +.macro memcpy_trailing_15bytes backwards, align
855 + ldmcsdb S!, {DAT0, DAT1}
857 + ldrcs DAT1, [S, #-4]!
858 + ldrcs DAT0, [S, #-4]!
860 + ldrmi DAT2, [S, #-4]!
861 + stmcsdb D!, {DAT0, DAT1}
862 + strmi DAT2, [D, #-4]!
865 + ldmcsia S!, {DAT0, DAT1}
867 + ldrcs DAT0, [S], #4
868 + ldrcs DAT1, [S], #4
870 + ldrmi DAT2, [S], #4
871 + stmcsia D!, {DAT0, DAT1}
872 + strmi DAT2, [D], #4
876 + ldrcsh DAT0, [S, #-2]!
877 + ldrmib DAT1, [S, #-1]
878 + strcsh DAT0, [D, #-2]!
879 + strmib DAT1, [D, #-1]
881 + ldrcsh DAT0, [S], #2
883 + strcsh DAT0, [D], #2
888 +.macro memcpy_long_inner_loop backwards, align
891 + ldr DAT0, [S, #-align]!
893 + ldr LAST, [S, #-align]!
899 + ldmdb S!, {DAT0, DAT1, DAT2, DAT3, DAT4, DAT5, DAT6, LAST}
901 + stmdb D!, {DAT4, DAT5, DAT6, LAST}
902 + stmdb D!, {DAT0, DAT1, DAT2, DAT3}
904 + ldmia S!, {DAT0, DAT1, DAT2, DAT3, DAT4, DAT5, DAT6, LAST}
906 + stmia D!, {DAT0, DAT1, DAT2, DAT3}
907 + stmia D!, {DAT4, DAT5, DAT6, LAST}
910 + unaligned_words backwards, align, 1, 8, DAT0, DAT1, DAT2, DAT3, DAT4, DAT5, DAT6, DAT7, LAST
914 + /* Just before the final (prefetch_distance+1) 32-byte blocks, deal with final preloads */
915 + preload_trailing backwards, S, N, OFF
916 + add N, N, #(prefetch_distance+2)*32 - 32
920 + ldmdb S!, {DAT0, DAT1, DAT2, DAT3, DAT4, DAT5, DAT6, LAST}
921 + stmdb D!, {DAT4, DAT5, DAT6, LAST}
922 + stmdb D!, {DAT0, DAT1, DAT2, DAT3}
924 + ldmia S!, {DAT0, DAT1, DAT2, DAT3, DAT4, DAT5, DAT6, LAST}
925 + stmia D!, {DAT0, DAT1, DAT2, DAT3}
926 + stmia D!, {DAT4, DAT5, DAT6, LAST}
929 + unaligned_words backwards, align, 0, 8, DAT0, DAT1, DAT2, DAT3, DAT4, DAT5, DAT6, DAT7, LAST
936 + ldmnedb S!, {DAT0, DAT1, DAT2, LAST}
937 + stmnedb D!, {DAT0, DAT1, DAT2, LAST}
939 + ldmneia S!, {DAT0, DAT1, DAT2, LAST}
940 + stmneia D!, {DAT0, DAT1, DAT2, LAST}
944 + unaligned_words backwards, align, 0, 4, DAT0, DAT1, DAT2, DAT3, LAST
947 + /* Trailing words and bytes */
953 + memcpy_trailing_15bytes backwards, align
955 + pop {DAT3, DAT4, DAT5, DAT6, DAT7}
956 + pop {D, DAT1, DAT2, pc}
959 +.macro memcpy_medium_inner_loop backwards, align
963 + ldmdb S!, {DAT0, DAT1, DAT2, LAST}
965 + ldr LAST, [S, #-4]!
966 + ldr DAT2, [S, #-4]!
967 + ldr DAT1, [S, #-4]!
968 + ldr DAT0, [S, #-4]!
970 + stmdb D!, {DAT0, DAT1, DAT2, LAST}
973 + ldmia S!, {DAT0, DAT1, DAT2, LAST}
980 + stmia D!, {DAT0, DAT1, DAT2, LAST}
984 + /* Trailing words and bytes */
987 + memcpy_trailing_15bytes backwards, align
989 + pop {D, DAT1, DAT2, pc}
992 +.macro memcpy_short_inner_loop backwards, align
996 + ldmnedb S!, {DAT0, DAT1, DAT2, LAST}
998 + ldrne LAST, [S, #-4]!
999 + ldrne DAT2, [S, #-4]!
1000 + ldrne DAT1, [S, #-4]!
1001 + ldrne DAT0, [S, #-4]!
1003 + stmnedb D!, {DAT0, DAT1, DAT2, LAST}
1006 + ldmneia S!, {DAT0, DAT1, DAT2, LAST}
1008 + ldrne DAT0, [S], #4
1009 + ldrne DAT1, [S], #4
1010 + ldrne DAT2, [S], #4
1011 + ldrne LAST, [S], #4
1013 + stmneia D!, {DAT0, DAT1, DAT2, LAST}
1015 + memcpy_trailing_15bytes backwards, align
1017 + pop {D, DAT1, DAT2, pc}
1020 +.macro memcpy backwards
1037 + push {D, DAT1, DAT2, lr}
1039 + .cfi_def_cfa_offset 16
1040 + .cfi_rel_offset D, 0
1043 + .cfi_undefined DAT0
1044 + .cfi_rel_offset DAT1, 4
1045 + .cfi_rel_offset DAT2, 8
1046 + .cfi_undefined LAST
1047 + .cfi_rel_offset lr, 12
1054 + /* See if we're guaranteed to have at least one 16-byte aligned 16-byte write */
1057 + /* To preload ahead as we go, we need at least (prefetch_distance+2) 32-byte blocks */
1058 + cmp N, #(prefetch_distance+3)*32 - 1
1062 + push {DAT3, DAT4, DAT5, DAT6, DAT7}
1064 + .cfi_def_cfa_offset 36
1065 + .cfi_rel_offset D, 20
1066 + .cfi_rel_offset DAT1, 24
1067 + .cfi_rel_offset DAT2, 28
1068 + .cfi_rel_offset DAT3, 0
1069 + .cfi_rel_offset DAT4, 4
1070 + .cfi_rel_offset DAT5, 8
1071 + .cfi_rel_offset DAT6, 12
1072 + .cfi_rel_offset DAT7, 16
1073 + .cfi_rel_offset lr, 32
1075 + /* Adjust N so that the decrement instruction can also test for
1076 + * inner loop termination. We want it to stop when there are
1077 + * (prefetch_distance+1) complete blocks to go. */
1078 + sub N, N, #(prefetch_distance+2)*32
1079 + preload_leading_step1 backwards, DAT0, S
1081 + /* Bug in GAS: it accepts, but mis-assembles the instruction
1082 + * ands DAT2, D, #60, 2
1083 + * which sets DAT2 to the number of leading bytes until destination is aligned and also clears C (sets borrow)
1090 + rsb DAT2, DAT2, #16 /* number of leading bytes until destination aligned */
1092 + preload_leading_step2 backwards, DAT0, S, DAT2, OFF
1093 + memcpy_leading_15bytes backwards, 1
1094 +154: /* Destination now 16-byte aligned; we have at least one prefetch as well as at least one 16-byte output block */
1095 + /* Prefetch offset is best selected such that it lies in the first 8 of each 32 bytes - but it's just as easy to aim for the first one */
1099 + sub OFF, OFF, #32*(prefetch_distance+1)
1102 + rsb OFF, OFF, #32*prefetch_distance
1104 + movs DAT0, S, lsl #31
1108 + memcpy_long_inner_loop backwards, 0
1109 +155: memcpy_long_inner_loop backwards, 1
1110 +156: memcpy_long_inner_loop backwards, 2
1111 +157: memcpy_long_inner_loop backwards, 3
1113 + .cfi_def_cfa_offset 16
1114 + .cfi_rel_offset D, 0
1115 + .cfi_rel_offset DAT1, 4
1116 + .cfi_rel_offset DAT2, 8
1117 + .cfi_same_value DAT3
1118 + .cfi_same_value DAT4
1119 + .cfi_same_value DAT5
1120 + .cfi_same_value DAT6
1121 + .cfi_same_value DAT7
1122 + .cfi_rel_offset lr, 12
1124 +160: /* Medium case */
1125 + preload_all backwards, 0, 0, S, N, DAT2, OFF
1126 + sub N, N, #16 /* simplifies inner loop termination */
1133 + rsb DAT2, DAT2, #16
1135 + memcpy_leading_15bytes backwards, align
1136 +164: /* Destination now 16-byte aligned; we have at least one 16-byte output block */
1139 + memcpy_medium_inner_loop backwards, 0
1140 +140: memcpy_medium_inner_loop backwards, 1
1142 +170: /* Short case, less than 31 bytes, so no guarantee of at least one 16-byte block */
1145 + preload_all backwards, 1, 0, S, N, DAT2, LAST
1151 + ldrb DAT0, [S, #-1]!
1152 + strb DAT0, [D, #-1]!
1154 + ldrb DAT0, [S], #1
1155 + strb DAT0, [D], #1
1159 +174: /* Destination now 4-byte aligned; we have 0 or more output bytes to go */
1162 + memcpy_short_inner_loop backwards, 0
1163 +140: memcpy_short_inner_loop backwards, 1
1182 +++ b/arch/arm/lib/memmove_rpi.S
1185 +Copyright (c) 2013, Raspberry Pi Foundation
1186 +Copyright (c) 2013, RISC OS Open Ltd
1187 +All rights reserved.
1189 +Redistribution and use in source and binary forms, with or without
1190 +modification, are permitted provided that the following conditions are met:
1191 + * Redistributions of source code must retain the above copyright
1192 + notice, this list of conditions and the following disclaimer.
1193 + * Redistributions in binary form must reproduce the above copyright
1194 + notice, this list of conditions and the following disclaimer in the
1195 + documentation and/or other materials provided with the distribution.
1196 + * Neither the name of the copyright holder nor the
1197 + names of its contributors may be used to endorse or promote products
1198 + derived from this software without specific prior written permission.
1200 +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
1201 +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
1202 +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
1203 +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
1204 +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
1205 +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
1206 +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
1207 +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
1208 +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
1209 +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1212 +#include <linux/linkage.h>
1213 +#include "arm-mem.h"
1214 +#include "memcpymove.h"
1216 +/* Prevent the stack from becoming executable */
1217 +#if defined(__linux__) && defined(__ELF__)
1218 +.section .note.GNU-stack,"",%progbits
1223 + .object_arch armv4
1229 + * void *memmove(void *s1, const void *s2, size_t n);
1231 + * a1 = pointer to destination
1232 + * a2 = pointer to source
1233 + * a3 = number of bytes to copy
1238 +.set prefetch_distance, 3
1242 + bpl memcpy /* pl works even over -1 - 0 and 0x7fffffff - 0x80000000 boundaries */
1246 +++ b/arch/arm/lib/memset_rpi.S
1249 +Copyright (c) 2013, Raspberry Pi Foundation
1250 +Copyright (c) 2013, RISC OS Open Ltd
1251 +All rights reserved.
1253 +Redistribution and use in source and binary forms, with or without
1254 +modification, are permitted provided that the following conditions are met:
1255 + * Redistributions of source code must retain the above copyright
1256 + notice, this list of conditions and the following disclaimer.
1257 + * Redistributions in binary form must reproduce the above copyright
1258 + notice, this list of conditions and the following disclaimer in the
1259 + documentation and/or other materials provided with the distribution.
1260 + * Neither the name of the copyright holder nor the
1261 + names of its contributors may be used to endorse or promote products
1262 + derived from this software without specific prior written permission.
1264 +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
1265 +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
1266 +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
1267 +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
1268 +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
1269 +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
1270 +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
1271 +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
1272 +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
1273 +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1276 +#include <linux/linkage.h>
1277 +#include "arm-mem.h"
1279 +/* Prevent the stack from becoming executable */
1280 +#if defined(__linux__) && defined(__ELF__)
1281 +.section .note.GNU-stack,"",%progbits
1286 + .object_arch armv4
1292 + * void *memset(void *s, int c, size_t n);
1294 + * a1 = pointer to buffer to fill
1295 + * a2 = byte pattern to fill with (caller-narrowed)
1296 + * a3 = number of bytes to fill
1309 + orr DAT0, DAT0, lsl #8
1311 + orr DAT0, DAT0, lsl #16
1314 + /* See if we're guaranteed to have at least one 16-byte aligned 16-byte write */
1318 +161: sub N, N, #16 /* simplifies inner loop termination */
1319 + /* Leading words and bytes */
1322 + rsb DAT3, S, #0 /* bits 0-3 = number of leading bytes until aligned */
1323 + movs DAT2, DAT3, lsl #31
1325 + strmib DAT0, [S], #1
1327 + strcsh DAT0, [S], #2
1328 + movs DAT2, DAT3, lsl #29
1330 + strmi DAT0, [S], #4
1332 + stmcsia S!, {DAT0, DAT1}
1333 +164: /* Delayed set up of DAT2 and DAT3 so we could use them as scratch registers above */
1336 + /* Now the inner loop of 16-byte stores */
1337 +165: stmia S!, {DAT0, DAT1, DAT2, DAT3}
1340 +166: /* Trailing words and bytes */
1341 + movs N, N, lsl #29
1342 + stmcsia S!, {DAT0, DAT1}
1343 + strmi DAT0, [S], #4
1345 + strcsh DAT0, [S], #2
1349 +170: /* Short case */
1356 + strb DAT0, [S], #1
1360 + stmneia S!, {DAT0, DAT1, DAT2, DAT3}
1371 --- a/arch/arm/lib/uaccess_with_memcpy.c
1372 +++ b/arch/arm/lib/uaccess_with_memcpy.c
1374 #include <asm/current.h>
1375 #include <asm/page.h>
1377 +#ifndef COPY_FROM_USER_THRESHOLD
1378 +#define COPY_FROM_USER_THRESHOLD 64
1381 +#ifndef COPY_TO_USER_THRESHOLD
1382 +#define COPY_TO_USER_THRESHOLD 64
1386 pin_page_for_write(const void __user *_addr, pte_t **ptep, spinlock_t **ptlp)
1388 @@ -84,7 +92,44 @@ pin_page_for_write(const void __user *_a
1392 -static unsigned long noinline
1394 +pin_page_for_read(const void __user *_addr, pte_t **ptep, spinlock_t **ptlp)
1396 + unsigned long addr = (unsigned long)_addr;
1403 + pgd = pgd_offset(current->mm, addr);
1404 + if (unlikely(pgd_none(*pgd) || pgd_bad(*pgd)))
1408 + pud = pud_offset(pgd, addr);
1409 + if (unlikely(pud_none(*pud) || pud_bad(*pud)))
1414 + pmd = pmd_offset(pud, addr);
1415 + if (unlikely(pmd_none(*pmd) || pmd_bad(*pmd)))
1418 + pte = pte_offset_map_lock(current->mm, pmd, addr, &ptl);
1419 + if (unlikely(!pte_present(*pte) || !pte_young(*pte))) {
1420 + pte_unmap_unlock(pte, ptl);
1430 +unsigned long noinline
1431 __copy_to_user_memcpy(void __user *to, const void *from, unsigned long n)
1433 unsigned long ua_flags;
1434 @@ -137,6 +182,57 @@ out:
1438 +unsigned long noinline
1439 +__copy_from_user_memcpy(void *to, const void __user *from, unsigned long n)
1441 + unsigned long ua_flags;
1444 + if (unlikely(segment_eq(get_fs(), KERNEL_DS))) {
1445 + memcpy(to, (const void *)from, n);
1449 + /* the mmap semaphore is taken only if not in an atomic context */
1450 + atomic = in_atomic();
1453 + down_read(¤t->mm->mmap_sem);
1459 + while (!pin_page_for_read(from, &pte, &ptl)) {
1462 + up_read(¤t->mm->mmap_sem);
1463 + if (__get_user(temp, (char __user *)from))
1466 + down_read(¤t->mm->mmap_sem);
1469 + tocopy = (~(unsigned long)from & ~PAGE_MASK) + 1;
1473 + ua_flags = uaccess_save_and_enable();
1474 + memcpy(to, (const void *)from, tocopy);
1475 + uaccess_restore(ua_flags);
1480 + pte_unmap_unlock(pte, ptl);
1483 + up_read(¤t->mm->mmap_sem);
1490 arm_copy_to_user(void __user *to, const void *from, unsigned long n)
1492 @@ -147,7 +243,7 @@ arm_copy_to_user(void __user *to, const
1493 * With frame pointer disabled, tail call optimization kicks in
1494 * as well making this test almost invisible.
1497 + if (n < COPY_TO_USER_THRESHOLD) {
1498 unsigned long ua_flags = uaccess_save_and_enable();
1499 n = __copy_to_user_std(to, from, n);
1500 uaccess_restore(ua_flags);
1501 @@ -156,6 +252,26 @@ arm_copy_to_user(void __user *to, const
1506 +unsigned long __must_check
1507 +arm_copy_from_user(void *to, const void __user *from, unsigned long n)
1510 + * This test is stubbed out of the main function above to keep
1511 + * the overhead for small copies low by avoiding a large
1512 + * register dump on the stack just to reload them right away.
1513 + * With frame pointer disabled, tail call optimization kicks in
1514 + * as well making this test almost invisible.
1516 + if (n < COPY_TO_USER_THRESHOLD) {
1517 + unsigned long ua_flags = uaccess_save_and_enable();
1518 + n = __copy_from_user_std(to, from, n);
1519 + uaccess_restore(ua_flags);
1521 + n = __copy_from_user_memcpy(to, from, n);
1526 static unsigned long noinline
1527 __clear_user_memset(void __user *addr, unsigned long n)
1528 --- a/arch/arm/mach-bcm/Kconfig
1529 +++ b/arch/arm/mach-bcm/Kconfig
1530 @@ -174,6 +174,13 @@ config ARCH_BCM_53573
1531 The base chip is BCM53573 and there are some packaging modifications
1532 like BCM47189 and BCM47452.
1534 +config BCM2835_FAST_MEMCPY
1535 + bool "Enable optimized __copy_to_user and __copy_from_user"
1536 + depends on ARCH_BCM2835 && ARCH_MULTI_V6
1539 + Optimized versions of __copy_to_user and __copy_from_user for Pi1.
1541 config ARCH_BCM_63XX
1542 bool "Broadcom BCM63xx DSL SoC"
1543 depends on ARCH_MULTI_V7