2 * Copyright (C) 1991,1992,1993,1997,1998,2003, 2005 Free Software Foundation, Inc.
3 * This file is part of the GNU C Library.
4 * Copyright (c) 2011 The Chromium OS Authors.
6 * SPDX-License-Identifier: GPL-2.0+
9 /* From glibc-2.14, sysdeps/i386/memset.c */
11 #include <linux/types.h>
12 #include <linux/compiler.h>
13 #include <asm/string.h>
15 typedef uint32_t op_t;
17 void *memset(void *dstpp, int c, size_t len)
20 unsigned long int dstp = (unsigned long int) dstpp;
22 /* This explicit register allocation improves code very much indeed. */
23 register op_t x asm("ax");
25 x = (unsigned char) c;
27 /* Clear the direction flag, so filling will move forward. */
30 /* This threshold value is optimal. */
32 /* Fill X with four copies of the char we want to fill with. */
36 /* Adjust LEN for the bytes handled in the first loop. */
37 len -= (-dstp) % sizeof(op_t);
40 * There are at least some bytes to set. No need to test for
41 * LEN == 0 in this alignment loop.
44 /* Fill bytes until DSTP is aligned on a longword boundary. */
47 "stosb" /* %0, %2, %3 */ :
48 "=D" (dstp), "=c" (d0) :
49 "0" (dstp), "1" ((-dstp) % sizeof(op_t)), "a" (x) :
55 "stosl" /* %0, %2, %3 */ :
56 "=D" (dstp), "=c" (d0) :
57 "0" (dstp), "1" (len / sizeof(op_t)), "a" (x) :
62 /* Write the last few bytes. */
65 "stosb" /* %0, %2, %3 */ :
66 "=D" (dstp), "=c" (d0) :
67 "0" (dstp), "1" (len), "a" (x) :
74 #define OPSIZ (sizeof(op_t))
76 #define BYTE_COPY_FWD(dst_bp, src_bp, nbytes) \
80 /* Clear the direction flag, so copying goes forward. */ \
85 "=D" (dst_bp), "=S" (src_bp), "=c" (__d0) : \
86 "0" (dst_bp), "1" (src_bp), "2" (nbytes) : \
90 #define WORD_COPY_FWD(dst_bp, src_bp, nbytes_left, nbytes) \
94 /* Clear the direction flag, so copying goes forward. */ \
96 /* Copy longwords. */ \
99 "=D" (dst_bp), "=S" (src_bp), "=c" (__d0) : \
100 "0" (dst_bp), "1" (src_bp), "2" ((nbytes) / 4) : \
102 (nbytes_left) = (nbytes) % 4; \
105 void *memcpy(void *dstpp, const void *srcpp, size_t len)
107 unsigned long int dstp = (long int)dstpp;
108 unsigned long int srcp = (long int)srcpp;
110 /* Copy from the beginning to the end. */
112 /* If there not too few bytes to copy, use word copy. */
113 if (len >= OP_T_THRES) {
114 /* Copy just a few bytes to make DSTP aligned. */
115 len -= (-dstp) % OPSIZ;
116 BYTE_COPY_FWD(dstp, srcp, (-dstp) % OPSIZ);
118 /* Copy from SRCP to DSTP taking advantage of the known
119 * alignment of DSTP. Number of bytes remaining is put
120 * in the third argument, i.e. in LEN. This number may
121 * vary from machine to machine.
123 WORD_COPY_FWD(dstp, srcp, len, len);
125 /* Fall out and copy the tail. */
128 /* There are just a few bytes to copy. Use byte memory operations. */
129 BYTE_COPY_FWD(dstp, srcp, len);
134 void *memmove(void *dest, const void *src, size_t n)
136 int d0, d1, d2, d3, d4, d5;
139 __asm__ __volatile__(
140 /* Handle more 16 bytes in loop */
144 /* Decide forward/backward copy mode */
149 * movs instruction have many startup latency
150 * so we handle small size by general register.
154 /* movs instruction is only good for aligned case */
162 /* We gobble 16 bytes forward in each loop */
165 "mov 0*4(%1), %3\n\t"
166 "mov 1*4(%1), %4\n\t"
167 "mov %3, 0*4(%2)\n\t"
168 "mov %4, 1*4(%2)\n\t"
169 "mov 2*4(%1), %3\n\t"
170 "mov 3*4(%1), %4\n\t"
171 "mov %3, 2*4(%2)\n\t"
172 "mov %4, 3*4(%2)\n\t"
173 "lea 0x10(%1), %1\n\t"
174 "lea 0x10(%2), %2\n\t"
179 /* Handle data forward by movs */
182 "mov -4(%1, %0), %3\n\t"
183 "lea -4(%2, %0), %4\n\t"
188 /* Handle data backward by movs */
193 "lea -4(%1, %0), %1\n\t"
194 "lea -4(%2, %0), %2\n\t"
202 /* Start to prepare for backward copy */
212 /* Calculate copy position to tail */
218 /* We gobble 16 bytes backward in each loop */
222 "mov -1*4(%1), %3\n\t"
223 "mov -2*4(%1), %4\n\t"
224 "mov %3, -1*4(%2)\n\t"
225 "mov %4, -2*4(%2)\n\t"
226 "mov -3*4(%1), %3\n\t"
227 "mov -4*4(%1), %4\n\t"
228 "mov %3, -3*4(%2)\n\t"
229 "mov %4, -4*4(%2)\n\t"
230 "lea -0x10(%1), %1\n\t"
231 "lea -0x10(%2), %2\n\t"
233 /* Calculate copy position to head */
238 /* Move data from 8 bytes to 15 bytes */
243 "mov 0*4(%1), %3\n\t"
244 "mov 1*4(%1), %4\n\t"
245 "mov -2*4(%1, %0), %5\n\t"
246 "mov -1*4(%1, %0), %1\n\t"
248 "mov %3, 0*4(%2)\n\t"
249 "mov %4, 1*4(%2)\n\t"
250 "mov %5, -2*4(%2, %0)\n\t"
251 "mov %1, -1*4(%2, %0)\n\t"
254 /* Move data from 4 bytes to 7 bytes */
259 "mov 0*4(%1), %3\n\t"
260 "mov -1*4(%1, %0), %4\n\t"
261 "mov %3, 0*4(%2)\n\t"
262 "mov %4, -1*4(%2, %0)\n\t"
265 /* Move data from 2 bytes to 3 bytes */
270 "movw 0*2(%1), %%dx\n\t"
271 "movw -1*2(%1, %0), %%bx\n\t"
272 "movw %%dx, 0*2(%2)\n\t"
273 "movw %%bx, -1*2(%2, %0)\n\t"
276 /* Move data for 1 byte */
281 "movb (%1), %%cl\n\t"
282 "movb %%cl, (%2)\n\t"
285 : "=&c" (d0), "=&S" (d1), "=&D" (d2),
286 "=r" (d3), "=r" (d4), "=r"(d5)