summaryrefslogtreecommitdiffstats
path: root/arch/x86/lib/string.c
blob: 6c66431ed93b3d4943954f0e07a04a0acf665c77 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
/*
 * Copyright (C) 1991,1992,1993,1997,1998,2003, 2005 Free Software Foundation, Inc.
 * This file is part of the GNU C Library.
 * Copyright (c) 2011 The Chromium OS Authors.
 *
 * SPDX-License-Identifier:	GPL-2.0+
 */

/* From glibc-2.14, sysdeps/i386/memset.c */

#include <linux/types.h>
#include <linux/compiler.h>
#include <asm/string.h>

typedef uint32_t op_t;

void *memset(void *dstpp, int c, size_t len)
{
	int d0;
	unsigned long int dstp = (unsigned long int) dstpp;

	/* This explicit register allocation improves code very much indeed. */
	register op_t x asm("ax");

	x = (unsigned char) c;

	/* Clear the direction flag, so filling will move forward.  */
	asm volatile("cld");

	/* This threshold value is optimal.  */
	if (len >= 12) {
		/* Fill X with four copies of the char we want to fill with. */
		x |= (x << 8);
		x |= (x << 16);

		/* Adjust LEN for the bytes handled in the first loop.  */
		len -= (-dstp) % sizeof(op_t);

		/*
		 * There are at least some bytes to set. No need to test for
		 * LEN == 0 in this alignment loop.
		 */

		/* Fill bytes until DSTP is aligned on a longword boundary. */
		asm volatile(
			"rep\n"
			"stosb" /* %0, %2, %3 */ :
			"=D" (dstp), "=c" (d0) :
			"0" (dstp), "1" ((-dstp) % sizeof(op_t)), "a" (x) :
			"memory");

		/* Fill longwords.  */
		asm volatile(
			"rep\n"
			"stosl" /* %0, %2, %3 */ :
			"=D" (dstp), "=c" (d0) :
			"0" (dstp), "1" (len / sizeof(op_t)), "a" (x) :
			"memory");
		len %= sizeof(op_t);
	}

	/* Write the last few bytes. */
	asm volatile(
		"rep\n"
		"stosb" /* %0, %2, %3 */ :
		"=D" (dstp), "=c" (d0) :
		"0" (dstp), "1" (len), "a" (x) :
		"memory");

	return dstpp;
}

#define	OP_T_THRES	8
#define OPSIZ	(sizeof(op_t))

#define BYTE_COPY_FWD(dst_bp, src_bp, nbytes)				  \
do {									  \
	int __d0;							  \
	asm volatile(							  \
		/* Clear the direction flag, so copying goes forward.  */ \
		"cld\n"							  \
		/* Copy bytes.  */					  \
		"rep\n"							  \
		"movsb" :						  \
		"=D" (dst_bp), "=S" (src_bp), "=c" (__d0) :		  \
		"0" (dst_bp), "1" (src_bp), "2" (nbytes) :		  \
		"memory");						  \
} while (0)

#define WORD_COPY_FWD(dst_bp, src_bp, nbytes_left, nbytes)		  \
do {									  \
	int __d0;							  \
	asm volatile(							  \
		/* Clear the direction flag, so copying goes forward.  */ \
		"cld\n"							  \
		/* Copy longwords.  */					  \
		"rep\n"							  \
		"movsl" :						  \
		"=D" (dst_bp), "=S" (src_bp), "=c" (__d0) :		  \
		"0" (dst_bp), "1" (src_bp), "2" ((nbytes) / 4) :	  \
		"memory");						  \
	(nbytes_left) = (nbytes) % 4;					  \
} while (0)

void *memcpy(void *dstpp, const void *srcpp, size_t len)
{
	unsigned long int dstp = (long int)dstpp;
	unsigned long int srcp = (long int)srcpp;

	/* Copy from the beginning to the end.  */

	/* If there not too few bytes to copy, use word copy.  */
	if (len >= OP_T_THRES) {
		/* Copy just a few bytes to make DSTP aligned.  */
		len -= (-dstp) % OPSIZ;
		BYTE_COPY_FWD(dstp, srcp, (-dstp) % OPSIZ);

		/* Copy from SRCP to DSTP taking advantage of the known
		 * alignment of DSTP.  Number of bytes remaining is put
		 * in the third argument, i.e. in LEN.  This number may
		 * vary from machine to machine.
		 */
		WORD_COPY_FWD(dstp, srcp, len, len);

		/* Fall out and copy the tail.  */
	}

	/* There are just a few bytes to copy. Use byte memory operations. */
	BYTE_COPY_FWD(dstp, srcp, len);

	return dstpp;
}