Consolidate arch-specific mem_malloc_init() implementations
[oweals/u-boot.git] / lib_blackfin / memcpy.S
1 /*
2  * File: memcpy.S
3  *
4  * Copyright 2004-2007 Analog Devices Inc.
5  * Enter bugs at http://blackfin.uclinux.org/
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, see the file COPYING, or write
19  * to the Free Software Foundation, Inc.,
20  * 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
21  */
22
23 .align 2
24
25 .globl _memcpy_ASM;
26 .type _memcpy_ASM, STT_FUNC;
27 _memcpy_ASM:
28         CC = R2 <=  0;  /* length not positive?*/
29         IF CC JUMP  .L_P1L2147483647;   /* Nothing to do */
30
31         P0 = R0 ;       /* dst*/
32         P1 = R1 ;       /* src*/
33         P2 = R2 ;       /* length */
34
35         /* check for overlapping data */
36         CC = R1 < R0;   /* src < dst */
37         IF !CC JUMP .Lno_overlap;
38         R3 = R1 + R2;
39         CC = R0 < R3;   /* and dst < src+len */
40         IF CC JUMP .Lhas_overlap;
41
42 .Lno_overlap:
43         /* Check for aligned data.*/
44
45         R3 = R1 | R0;
46         R0 = 0x3;
47         R3 = R3 & R0;
48         CC = R3;        /* low bits set on either address? */
49         IF CC JUMP .Lnot_aligned;
50
51         /* Both addresses are word-aligned, so we can copy
52         at least part of the data using word copies.*/
53         P2 = P2 >> 2;
54         CC = P2 <= 2;
55         IF !CC JUMP .Lmore_than_seven;
56         /* less than eight bytes... */
57         P2 = R2;
58         LSETUP(.Lthree_start, .Lthree_end) LC0=P2;
59         R0 = R1;        /* setup src address for return */
60 .Lthree_start:
61         R3 = B[P1++] (X);
62 .Lthree_end:
63         B[P0++] = R3;
64
65         RTS;
66
67 .Lmore_than_seven:
68         /* There's at least eight bytes to copy. */
69         P2 += -1;       /* because we unroll one iteration */
70         LSETUP(.Lword_loop, .Lword_loop) LC0=P2;
71         R0 = R1;
72         I1 = P1;
73         R3 = [I1++];
74 .Lword_loop:
75         MNOP || [P0++] = R3 || R3 = [I1++];
76
77         [P0++] = R3;
78         /* Any remaining bytes to copy? */
79         R3 = 0x3;
80         R3 = R2 & R3;
81         CC = R3 == 0;
82         P1 = I1;        /* in case there's something left, */
83         IF !CC JUMP .Lbytes_left;
84         RTS;
85 .Lbytes_left:   P2 = R3;
86 .Lnot_aligned:
87         /* From here, we're copying byte-by-byte. */
88         LSETUP (.Lbyte_start , .Lbyte_end) LC0=P2;
89         R0 = R1;        /* Save src address for return */
90 .Lbyte_start:
91         R1 = B[P1++] (X);
92 .Lbyte_end:
93         B[P0++] = R1;
94
95 .L_P1L2147483647:
96         RTS;
97
98 .Lhas_overlap:
99 /* Need to reverse the copying, because the
100  * dst would clobber the src.
101  * Don't bother to work out alignment for
102  * the reverse case.
103  */
104         R0 = R1;        /* save src for later. */
105         P0 = P0 + P2;
106         P0 += -1;
107         P1 = P1 + P2;
108         P1 += -1;
109         LSETUP(.Lover_start, .Lover_end) LC0=P2;
110 .Lover_start:
111         R1 = B[P1--] (X);
112 .Lover_end:
113         B[P0--] = R1;
114
115         RTS;
116
117 .size _memcpy_ASM, .-_memcpy_ASM