copy_user_uncached_64.S source code [linux/arch/x86/lib/copy_user_uncached_64.S]

1	/ SPDX-License-Identifier: GPL-2.0-only /
2	/*
3	* Copyright 2023 Linus Torvalds <torvalds@linux-foundation.org>
4	*/
5
6	#include <linux/export.h>
7	#include <linux/linkage.h>
8	#include <asm/asm.h>
9
10	/*
11	* copy_user_nocache - Uncached memory copy with exception handling
12	*
13	* This copies from user space into kernel space, but the kernel
14	* space accesses can take a machine check exception, so they too
15	* need exception handling.
16	*
17	* Note: only 32-bit and 64-bit stores have non-temporal versions,
18	* and we only use aligned versions. Any unaligned parts at the
19	* start or end of the copy will be done using normal cached stores.
20	*
21	* Input:
22	* rdi destination
23	* rsi source
24	* edx count
25	*
26	* Output:
27	* rax uncopied bytes or 0 if successful.
28	*/
29	SYM_FUNC_START(__copy_user_nocache)
30	/ If destination is not 7-byte aligned, we'll have to align it /
31	testb $`7`,%dil
32	jne .Lalign
33
34	.Lis_aligned:
35	cmp $`64`,%edx
36	jb .Lquadwords
37
38	.p2align `4`,`0x90`
39	.Lunrolled:
40	`10`: movq (%rsi),%r8
41	`11`: movq `8`(%rsi),%r9
42	`12`: movq `16`(%rsi),%r10
43	`13`: movq `24`(%rsi),%r11
44	`20`: movnti %r8,(%rdi)
45	`21`: movnti %r9,`8`(%rdi)
46	`22`: movnti %r10,`16`(%rdi)
47	`23`: movnti %r11,`24`(%rdi)
48	`30`: movq `32`(%rsi),%r8
49	`31`: movq `40`(%rsi),%r9
50	`32`: movq `48`(%rsi),%r10
51	`33`: movq `56`(%rsi),%r11
52	`40`: movnti %r8,`32`(%rdi)
53	`41`: movnti %r9,`40`(%rdi)
54	`42`: movnti %r10,`48`(%rdi)
55	`43`: movnti %r11,`56`(%rdi)
56
57	addq $`64`,%rsi
58	addq $`64`,%rdi
59	sub $`64`,%edx
60	cmp $`64`,%edx
61	jae .Lunrolled
62
63	/*
64	* First set of user mode loads have been done
65	* without any stores, so if they fail, we can
66	* just try the non-unrolled loop.
67	*/
68	_ASM_EXTABLE_UA(`10b`, .Lquadwords)
69	_ASM_EXTABLE_UA(`11b`, .Lquadwords)
70	_ASM_EXTABLE_UA(`12b`, .Lquadwords)
71	_ASM_EXTABLE_UA(`13b`, .Lquadwords)
72
73	/*
74	* The second set of user mode loads have been
75	* done with 32 bytes stored to the destination,
76	* so we need to take that into account before
77	* falling back to the unrolled loop.
78	*/
79	_ASM_EXTABLE_UA(`30b`, .Lfixup32)
80	_ASM_EXTABLE_UA(`31b`, .Lfixup32)
81	_ASM_EXTABLE_UA(`32b`, .Lfixup32)
82	_ASM_EXTABLE_UA(`33b`, .Lfixup32)
83
84	/*
85	* An exception on a write means that we're
86	* done, but we need to update the count
87	* depending on where in the unrolled loop
88	* we were.
89	*/
90	_ASM_EXTABLE_UA(`20b`, .Ldone0)
91	_ASM_EXTABLE_UA(`21b`, .Ldone8)
92	_ASM_EXTABLE_UA(`22b`, .Ldone16)
93	_ASM_EXTABLE_UA(`23b`, .Ldone24)
94	_ASM_EXTABLE_UA(`40b`, .Ldone32)
95	_ASM_EXTABLE_UA(`41b`, .Ldone40)
96	_ASM_EXTABLE_UA(`42b`, .Ldone48)
97	_ASM_EXTABLE_UA(`43b`, .Ldone56)
98
99	.Lquadwords:
100	cmp $`8`,%edx
101	jb .Llong
102	`50`: movq (%rsi),%rax
103	`51`: movnti %rax,(%rdi)
104	addq $`8`,%rsi
105	addq $`8`,%rdi
106	sub $`8`,%edx
107	jmp .Lquadwords
108
109	/*
110	* If we fail on the last full quadword, we will
111	* not try to do any byte-wise cached accesses.
112	* We will try to do one more 4-byte uncached
113	* one, though.
114	*/
115	_ASM_EXTABLE_UA(`50b`, .Llast4)
116	_ASM_EXTABLE_UA(`51b`, .Ldone0)
117
118	.Llong:
119	test $`4`,%dl
120	je .Lword
121	`60`: movl (%rsi),%eax
122	`61`: movnti %eax,(%rdi)
123	addq $`4`,%rsi
124	addq $`4`,%rdi
125	sub $`4`,%edx
126	.Lword:
127	sfence
128	test $`2`,%dl
129	je .Lbyte
130	`70`: movw (%rsi),%ax
131	`71`: movw %ax,(%rdi)
132	addq $`2`,%rsi
133	addq $`2`,%rdi
134	sub $`2`,%edx
135	.Lbyte:
136	test $`1`,%dl
137	je .Ldone
138	`80`: movb (%rsi),%al
139	`81`: movb %al,(%rdi)
140	dec %edx
141	.Ldone:
142	mov %edx,%eax
143	RET
144
145	/*
146	* If we fail on the last four bytes, we won't
147	* bother with any fixups. It's dead, Jim. Note
148	* that there's no need for 'sfence' for any
149	* of this, since the exception will have been
150	* serializing.
151	*/
152	_ASM_EXTABLE_UA(`60b`, .Ldone)
153	_ASM_EXTABLE_UA(`61b`, .Ldone)
154	_ASM_EXTABLE_UA(`70b`, .Ldone)
155	_ASM_EXTABLE_UA(`71b`, .Ldone)
156	_ASM_EXTABLE_UA(`80b`, .Ldone)
157	_ASM_EXTABLE_UA(`81b`, .Ldone)
158
159	/*
160	* This is the "head needs aliging" case when
161	* the destination isn't 8-byte aligned. The
162	* 4-byte case can be done uncached, but any
163	* smaller alignment is done with regular stores.
164	*/
165	.Lalign:
166	test $`1`,%dil
167	je .Lalign_word
168	test %edx,%edx
169	je .Ldone
170	`90`: movb (%rsi),%al
171	`91`: movb %al,(%rdi)
172	inc %rsi
173	inc %rdi
174	dec %edx
175	.Lalign_word:
176	test $`2`,%dil
177	je .Lalign_long
178	cmp $`2`,%edx
179	jb .Lbyte
180	`92`: movw (%rsi),%ax
181	`93`: movw %ax,(%rdi)
182	addq $`2`,%rsi
183	addq $`2`,%rdi
184	sub $`2`,%edx
185	.Lalign_long:
186	test $`4`,%dil
187	je .Lis_aligned
188	cmp $`4`,%edx
189	jb .Lword
190	`94`: movl (%rsi),%eax
191	`95`: movnti %eax,(%rdi)
192	addq $`4`,%rsi
193	addq $`4`,%rdi
194	sub $`4`,%edx
195	jmp .Lis_aligned
196
197	/*
198	* If we fail on the initial alignment accesses,
199	* we're all done. Again, no point in trying to
200	* do byte-by-byte probing if the 4-byte load
201	* fails - we're not doing any uncached accesses
202	* any more.
203	*/
204	_ASM_EXTABLE_UA(`90b`, .Ldone)
205	_ASM_EXTABLE_UA(`91b`, .Ldone)
206	_ASM_EXTABLE_UA(`92b`, .Ldone)
207	_ASM_EXTABLE_UA(`93b`, .Ldone)
208	_ASM_EXTABLE_UA(`94b`, .Ldone)
209	_ASM_EXTABLE_UA(`95b`, .Ldone)
210
211	/*
212	* Exception table fixups for faults in the middle
213	*/
214	.Ldone56: sub $`8`,%edx
215	.Ldone48: sub $`8`,%edx
216	.Ldone40: sub $`8`,%edx
217	.Ldone32: sub $`8`,%edx
218	.Ldone24: sub $`8`,%edx
219	.Ldone16: sub $`8`,%edx
220	.Ldone8: sub $`8`,%edx
221	.Ldone0:
222	mov %edx,%eax
223	RET
224
225	.Lfixup32:
226	addq $`32`,%rsi
227	addq $`32`,%rdi
228	sub $`32`,%edx
229	jmp .Lquadwords
230
231	.Llast4:
232	`52`: movl (%rsi),%eax
233	`53`: movnti %eax,(%rdi)
234	sfence
235	sub $`4`,%edx
236	mov %edx,%eax
237	RET
238	_ASM_EXTABLE_UA(`52b`, .Ldone0)
239	_ASM_EXTABLE_UA(`53b`, .Ldone0)
240
241	SYM_FUNC_END(__copy_user_nocache)
242	EXPORT_SYMBOL(__copy_user_nocache)
243

source code of linux/arch/x86/lib/copy_user_uncached_64.S