1 | /* SPDX-License-Identifier: GPL-2.0-only */ |
2 | /* |
3 | * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved. |
4 | */ |
5 | |
6 | /* Numerology: |
7 | * WXYZ |
8 | * W: width in bytes |
9 | * X: Load=0, Store=1 |
10 | * Y: Location 0=preamble,8=loop,9=epilog |
11 | * Z: Location=0,handler=9 |
12 | */ |
13 | .text |
14 | .global FUNCNAME |
15 | .type FUNCNAME, @function |
16 | .p2align 5 |
17 | FUNCNAME: |
18 | { |
19 | p0 = cmp.gtu(bytes,#0) |
20 | if (!p0.new) jump:nt .Ldone |
21 | r3 = or(dst,src) |
22 | r4 = xor(dst,src) |
23 | } |
24 | { |
25 | p1 = cmp.gtu(bytes,#15) |
26 | p0 = bitsclr(r3,#7) |
27 | if (!p0.new) jump:nt .Loop_not_aligned_8 |
28 | src_dst_sav = combine(src,dst) |
29 | } |
30 | |
31 | { |
32 | loopcount = lsr(bytes,#3) |
33 | if (!p1) jump .Lsmall |
34 | } |
35 | p3=sp1loop0(.Loop8,loopcount) |
36 | .Loop8: |
37 | 8080: |
38 | 8180: |
39 | { |
40 | if (p3) memd(dst++#8) = d_dbuf |
41 | d_dbuf = memd(src++#8) |
42 | }:endloop0 |
43 | 8190: |
44 | { |
45 | memd(dst++#8) = d_dbuf |
46 | bytes -= asl(loopcount,#3) |
47 | jump .Lsmall |
48 | } |
49 | |
50 | .Loop_not_aligned_8: |
51 | { |
52 | p0 = bitsclr(r4,#7) |
53 | if (p0.new) jump:nt .Lalign |
54 | } |
55 | { |
56 | p0 = bitsclr(r3,#3) |
57 | if (!p0.new) jump:nt .Loop_not_aligned_4 |
58 | p1 = cmp.gtu(bytes,#7) |
59 | } |
60 | |
61 | { |
62 | if (!p1) jump .Lsmall |
63 | loopcount = lsr(bytes,#2) |
64 | } |
65 | p3=sp1loop0(.Loop4,loopcount) |
66 | .Loop4: |
67 | 4080: |
68 | 4180: |
69 | { |
70 | if (p3) memw(dst++#4) = w_dbuf |
71 | w_dbuf = memw(src++#4) |
72 | }:endloop0 |
73 | 4190: |
74 | { |
75 | memw(dst++#4) = w_dbuf |
76 | bytes -= asl(loopcount,#2) |
77 | jump .Lsmall |
78 | } |
79 | |
80 | .Loop_not_aligned_4: |
81 | { |
82 | p0 = bitsclr(r3,#1) |
83 | if (!p0.new) jump:nt .Loop_not_aligned |
84 | p1 = cmp.gtu(bytes,#3) |
85 | } |
86 | |
87 | { |
88 | if (!p1) jump .Lsmall |
89 | loopcount = lsr(bytes,#1) |
90 | } |
91 | p3=sp1loop0(.Loop2,loopcount) |
92 | .Loop2: |
93 | 2080: |
94 | 2180: |
95 | { |
96 | if (p3) memh(dst++#2) = w_dbuf |
97 | w_dbuf = memuh(src++#2) |
98 | }:endloop0 |
99 | 2190: |
100 | { |
101 | memh(dst++#2) = w_dbuf |
102 | bytes -= asl(loopcount,#1) |
103 | jump .Lsmall |
104 | } |
105 | |
106 | .Loop_not_aligned: /* Works for as small as one byte */ |
107 | p3=sp1loop0(.Loop1,bytes) |
108 | .Loop1: |
109 | 1080: |
110 | 1180: |
111 | { |
112 | if (p3) memb(dst++#1) = w_dbuf |
113 | w_dbuf = memub(src++#1) |
114 | }:endloop0 |
115 | /* Done */ |
116 | 1190: |
117 | { |
118 | memb(dst) = w_dbuf |
119 | jumpr r31 |
120 | r0 = #0 |
121 | } |
122 | |
123 | .Lsmall: |
124 | { |
125 | p0 = cmp.gtu(bytes,#0) |
126 | if (p0.new) jump:nt .Loop_not_aligned |
127 | } |
128 | .Ldone: |
129 | { |
130 | r0 = #0 |
131 | jumpr r31 |
132 | } |
133 | .falign |
134 | .Lalign: |
135 | 1000: |
136 | { |
137 | if (p0.new) w_dbuf = memub(src) |
138 | p0 = tstbit(src,#0) |
139 | if (!p1) jump .Lsmall |
140 | } |
141 | 1100: |
142 | { |
143 | if (p0) memb(dst++#1) = w_dbuf |
144 | if (p0) bytes = add(bytes,#-1) |
145 | if (p0) src = add(src,#1) |
146 | } |
147 | 2000: |
148 | { |
149 | if (p0.new) w_dbuf = memuh(src) |
150 | p0 = tstbit(src,#1) |
151 | if (!p1) jump .Lsmall |
152 | } |
153 | 2100: |
154 | { |
155 | if (p0) memh(dst++#2) = w_dbuf |
156 | if (p0) bytes = add(bytes,#-2) |
157 | if (p0) src = add(src,#2) |
158 | } |
159 | 4000: |
160 | { |
161 | if (p0.new) w_dbuf = memw(src) |
162 | p0 = tstbit(src,#2) |
163 | if (!p1) jump .Lsmall |
164 | } |
165 | 4100: |
166 | { |
167 | if (p0) memw(dst++#4) = w_dbuf |
168 | if (p0) bytes = add(bytes,#-4) |
169 | if (p0) src = add(src,#4) |
170 | jump FUNCNAME |
171 | } |
172 | .size FUNCNAME,.-FUNCNAME |
173 | |