1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * Copyright (C) 2024 Christophe Leroy <christophe.leroy@csgroup.eu>, CS GROUP France
4 */
5
6#include <linux/linkage.h>
7
8#include <asm/ppc_asm.h>
9
10#define dst_bytes r3
11#define key r4
12#define counter r5
13#define nblocks r6
14
15#define idx_r0 r0
16#define val4 r4
17
18#define const0 0x61707865
19#define const1 0x3320646e
20#define const2 0x79622d32
21#define const3 0x6b206574
22
23#define key0 r5
24#define key1 r6
25#define key2 r7
26#define key3 r8
27#define key4 r9
28#define key5 r10
29#define key6 r11
30#define key7 r12
31
32#define counter0 r14
33#define counter1 r15
34
35#define state0 r16
36#define state1 r17
37#define state2 r18
38#define state3 r19
39#define state4 r20
40#define state5 r21
41#define state6 r22
42#define state7 r23
43#define state8 r24
44#define state9 r25
45#define state10 r26
46#define state11 r27
47#define state12 r28
48#define state13 r29
49#define state14 r30
50#define state15 r31
51
52.macro quarterround4 a1 b1 c1 d1 a2 b2 c2 d2 a3 b3 c3 d3 a4 b4 c4 d4
53 add \a1, \a1, \b1
54 add \a2, \a2, \b2
55 add \a3, \a3, \b3
56 add \a4, \a4, \b4
57 xor \d1, \d1, \a1
58 xor \d2, \d2, \a2
59 xor \d3, \d3, \a3
60 xor \d4, \d4, \a4
61 rotlwi \d1, \d1, 16
62 rotlwi \d2, \d2, 16
63 rotlwi \d3, \d3, 16
64 rotlwi \d4, \d4, 16
65 add \c1, \c1, \d1
66 add \c2, \c2, \d2
67 add \c3, \c3, \d3
68 add \c4, \c4, \d4
69 xor \b1, \b1, \c1
70 xor \b2, \b2, \c2
71 xor \b3, \b3, \c3
72 xor \b4, \b4, \c4
73 rotlwi \b1, \b1, 12
74 rotlwi \b2, \b2, 12
75 rotlwi \b3, \b3, 12
76 rotlwi \b4, \b4, 12
77 add \a1, \a1, \b1
78 add \a2, \a2, \b2
79 add \a3, \a3, \b3
80 add \a4, \a4, \b4
81 xor \d1, \d1, \a1
82 xor \d2, \d2, \a2
83 xor \d3, \d3, \a3
84 xor \d4, \d4, \a4
85 rotlwi \d1, \d1, 8
86 rotlwi \d2, \d2, 8
87 rotlwi \d3, \d3, 8
88 rotlwi \d4, \d4, 8
89 add \c1, \c1, \d1
90 add \c2, \c2, \d2
91 add \c3, \c3, \d3
92 add \c4, \c4, \d4
93 xor \b1, \b1, \c1
94 xor \b2, \b2, \c2
95 xor \b3, \b3, \c3
96 xor \b4, \b4, \c4
97 rotlwi \b1, \b1, 7
98 rotlwi \b2, \b2, 7
99 rotlwi \b3, \b3, 7
100 rotlwi \b4, \b4, 7
101.endm
102
103#define QUARTERROUND4(a1,b1,c1,d1,a2,b2,c2,d2,a3,b3,c3,d3,a4,b4,c4,d4) \
104 quarterround4 state##a1 state##b1 state##c1 state##d1 \
105 state##a2 state##b2 state##c2 state##d2 \
106 state##a3 state##b3 state##c3 state##d3 \
107 state##a4 state##b4 state##c4 state##d4
108
109/*
110 * Very basic 32 bits implementation of ChaCha20. Produces a given positive number
111 * of blocks of output with a nonce of 0, taking an input key and 8-byte
112 * counter. Importantly does not spill to the stack. Its arguments are:
113 *
114 * r3: output bytes
115 * r4: 32-byte key input
116 * r5: 8-byte counter input/output (saved on stack)
117 * r6: number of 64-byte blocks to write to output
118 *
119 * r0: counter of blocks (initialised with r6)
120 * r4: Value '4' after key has been read.
121 * r5-r12: key
122 * r14-r15: counter
123 * r16-r31: state
124 */
125SYM_FUNC_START(__arch_chacha20_blocks_nostack)
126#ifdef __powerpc64__
127 std counter, -216(r1)
128
129 std r14, -144(r1)
130 std r15, -136(r1)
131 std r16, -128(r1)
132 std r17, -120(r1)
133 std r18, -112(r1)
134 std r19, -104(r1)
135 std r20, -96(r1)
136 std r21, -88(r1)
137 std r22, -80(r1)
138 std r23, -72(r1)
139 std r24, -64(r1)
140 std r25, -56(r1)
141 std r26, -48(r1)
142 std r27, -40(r1)
143 std r28, -32(r1)
144 std r29, -24(r1)
145 std r30, -16(r1)
146 std r31, -8(r1)
147#else
148 stwu r1, -96(r1)
149 stw counter, 20(r1)
150#ifdef __BIG_ENDIAN__
151 stmw r14, 24(r1)
152#else
153 stw r14, 24(r1)
154 stw r15, 28(r1)
155 stw r16, 32(r1)
156 stw r17, 36(r1)
157 stw r18, 40(r1)
158 stw r19, 44(r1)
159 stw r20, 48(r1)
160 stw r21, 52(r1)
161 stw r22, 56(r1)
162 stw r23, 60(r1)
163 stw r24, 64(r1)
164 stw r25, 68(r1)
165 stw r26, 72(r1)
166 stw r27, 76(r1)
167 stw r28, 80(r1)
168 stw r29, 84(r1)
169 stw r30, 88(r1)
170 stw r31, 92(r1)
171#endif
172#endif /* __powerpc64__ */
173
174 lwz counter0, 0(counter)
175 lwz counter1, 4(counter)
176#ifdef __powerpc64__
177 rldimi counter0, counter1, 32, 0
178#endif
179 mr idx_r0, nblocks
180 subi dst_bytes, dst_bytes, 4
181
182 lwz key0, 0(key)
183 lwz key1, 4(key)
184 lwz key2, 8(key)
185 lwz key3, 12(key)
186 lwz key4, 16(key)
187 lwz key5, 20(key)
188 lwz key6, 24(key)
189 lwz key7, 28(key)
190
191 li val4, 4
192.Lblock:
193 li r31, 10
194
195 lis state0, const0@ha
196 lis state1, const1@ha
197 lis state2, const2@ha
198 lis state3, const3@ha
199 addi state0, state0, const0@l
200 addi state1, state1, const1@l
201 addi state2, state2, const2@l
202 addi state3, state3, const3@l
203
204 mtctr r31
205
206 mr state4, key0
207 mr state5, key1
208 mr state6, key2
209 mr state7, key3
210 mr state8, key4
211 mr state9, key5
212 mr state10, key6
213 mr state11, key7
214
215 mr state12, counter0
216 mr state13, counter1
217
218 li state14, 0
219 li state15, 0
220
221.Lpermute:
222 QUARTERROUND4( 0, 4, 8,12, 1, 5, 9,13, 2, 6,10,14, 3, 7,11,15)
223 QUARTERROUND4( 0, 5,10,15, 1, 6,11,12, 2, 7, 8,13, 3, 4, 9,14)
224
225 bdnz .Lpermute
226
227 addis state0, state0, const0@ha
228 addis state1, state1, const1@ha
229 addis state2, state2, const2@ha
230 addis state3, state3, const3@ha
231 addi state0, state0, const0@l
232 addi state1, state1, const1@l
233 addi state2, state2, const2@l
234 addi state3, state3, const3@l
235
236 add state4, state4, key0
237 add state5, state5, key1
238 add state6, state6, key2
239 add state7, state7, key3
240 add state8, state8, key4
241 add state9, state9, key5
242 add state10, state10, key6
243 add state11, state11, key7
244
245 add state12, state12, counter0
246 add state13, state13, counter1
247
248#ifdef __BIG_ENDIAN__
249 stwbrx state0, val4, dst_bytes
250 addi dst_bytes, dst_bytes, 8
251 stwbrx state1, 0, dst_bytes
252 stwbrx state2, val4, dst_bytes
253 addi dst_bytes, dst_bytes, 8
254 stwbrx state3, 0, dst_bytes
255 stwbrx state4, val4, dst_bytes
256 addi dst_bytes, dst_bytes, 8
257 stwbrx state5, 0, dst_bytes
258 stwbrx state6, val4, dst_bytes
259 addi dst_bytes, dst_bytes, 8
260 stwbrx state7, 0, dst_bytes
261 stwbrx state8, val4, dst_bytes
262 addi dst_bytes, dst_bytes, 8
263 stwbrx state9, 0, dst_bytes
264 stwbrx state10, val4, dst_bytes
265 addi dst_bytes, dst_bytes, 8
266 stwbrx state11, 0, dst_bytes
267 stwbrx state12, val4, dst_bytes
268 addi dst_bytes, dst_bytes, 8
269 stwbrx state13, 0, dst_bytes
270 stwbrx state14, val4, dst_bytes
271 addi dst_bytes, dst_bytes, 8
272 stwbrx state15, 0, dst_bytes
273#else
274 stw state0, 4(dst_bytes)
275 stw state1, 8(dst_bytes)
276 stw state2, 12(dst_bytes)
277 stw state3, 16(dst_bytes)
278 stw state4, 20(dst_bytes)
279 stw state5, 24(dst_bytes)
280 stw state6, 28(dst_bytes)
281 stw state7, 32(dst_bytes)
282 stw state8, 36(dst_bytes)
283 stw state9, 40(dst_bytes)
284 stw state10, 44(dst_bytes)
285 stw state11, 48(dst_bytes)
286 stw state12, 52(dst_bytes)
287 stw state13, 56(dst_bytes)
288 stw state14, 60(dst_bytes)
289 stwu state15, 64(dst_bytes)
290#endif
291
292 subic. idx_r0, idx_r0, 1 /* subi. can't use r0 as source */
293
294#ifdef __powerpc64__
295 addi counter0, counter0, 1
296 srdi counter1, counter0, 32
297#else
298 addic counter0, counter0, 1
299 addze counter1, counter1
300#endif
301
302 bne .Lblock
303
304#ifdef __powerpc64__
305 ld counter, -216(r1)
306#else
307 lwz counter, 20(r1)
308#endif
309 stw counter0, 0(counter)
310 stw counter1, 4(counter)
311
312 li r6, 0
313 li r7, 0
314 li r8, 0
315 li r9, 0
316 li r10, 0
317 li r11, 0
318 li r12, 0
319
320#ifdef __powerpc64__
321 ld r14, -144(r1)
322 ld r15, -136(r1)
323 ld r16, -128(r1)
324 ld r17, -120(r1)
325 ld r18, -112(r1)
326 ld r19, -104(r1)
327 ld r20, -96(r1)
328 ld r21, -88(r1)
329 ld r22, -80(r1)
330 ld r23, -72(r1)
331 ld r24, -64(r1)
332 ld r25, -56(r1)
333 ld r26, -48(r1)
334 ld r27, -40(r1)
335 ld r28, -32(r1)
336 ld r29, -24(r1)
337 ld r30, -16(r1)
338 ld r31, -8(r1)
339#else
340#ifdef __BIG_ENDIAN__
341 lmw r14, 24(r1)
342#else
343 lwz r14, 24(r1)
344 lwz r15, 28(r1)
345 lwz r16, 32(r1)
346 lwz r17, 36(r1)
347 lwz r18, 40(r1)
348 lwz r19, 44(r1)
349 lwz r20, 48(r1)
350 lwz r21, 52(r1)
351 lwz r22, 56(r1)
352 lwz r23, 60(r1)
353 lwz r24, 64(r1)
354 lwz r25, 68(r1)
355 lwz r26, 72(r1)
356 lwz r27, 76(r1)
357 lwz r28, 80(r1)
358 lwz r29, 84(r1)
359 lwz r30, 88(r1)
360 lwz r31, 92(r1)
361#endif
362 addi r1, r1, 96
363#endif /* __powerpc64__ */
364 blr
365SYM_FUNC_END(__arch_chacha20_blocks_nostack)
366

Provided by KDAB

Privacy Policy
Improve your Profiling and Debugging skills
Find out more

source code of linux/arch/powerpc/kernel/vdso/vgetrandom-chacha.S