1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * arch/parisc/lib/io.c |
4 | * |
5 | * Copyright (c) Matthew Wilcox 2001 for Hewlett-Packard |
6 | * Copyright (c) Randolph Chung 2001 <tausq@debian.org> |
7 | * |
8 | * IO accessing functions which shouldn't be inlined because they're too big |
9 | */ |
10 | |
11 | #include <linux/kernel.h> |
12 | #include <linux/module.h> |
13 | #include <asm/io.h> |
14 | |
15 | /* Copies a block of memory to a device in an efficient manner. |
16 | * Assumes the device can cope with 32-bit transfers. If it can't, |
17 | * don't use this function. |
18 | */ |
19 | void memcpy_toio(volatile void __iomem *dst, const void *src, int count) |
20 | { |
21 | if (((unsigned long)dst & 3) != ((unsigned long)src & 3)) |
22 | goto bytecopy; |
23 | while ((unsigned long)dst & 3) { |
24 | writeb(val: *(char *)src, addr: dst++); |
25 | src++; |
26 | count--; |
27 | } |
28 | while (count > 3) { |
29 | __raw_writel(val: *(u32 *)src, addr: dst); |
30 | src += 4; |
31 | dst += 4; |
32 | count -= 4; |
33 | } |
34 | bytecopy: |
35 | while (count--) { |
36 | writeb(val: *(char *)src, addr: dst++); |
37 | src++; |
38 | } |
39 | } |
40 | |
41 | /* |
42 | ** Copies a block of memory from a device in an efficient manner. |
43 | ** Assumes the device can cope with 32-bit transfers. If it can't, |
44 | ** don't use this function. |
45 | ** |
46 | ** CR16 counts on C3000 reading 256 bytes from Symbios 896 RAM: |
47 | ** 27341/64 = 427 cyc per int |
48 | ** 61311/128 = 478 cyc per short |
49 | ** 122637/256 = 479 cyc per byte |
50 | ** Ergo bus latencies dominant (not transfer size). |
51 | ** Minimize total number of transfers at cost of CPU cycles. |
52 | ** TODO: only look at src alignment and adjust the stores to dest. |
53 | */ |
54 | void memcpy_fromio(void *dst, const volatile void __iomem *src, int count) |
55 | { |
56 | /* first compare alignment of src/dst */ |
57 | if ( (((unsigned long)dst ^ (unsigned long)src) & 1) || (count < 2) ) |
58 | goto bytecopy; |
59 | |
60 | if ( (((unsigned long)dst ^ (unsigned long)src) & 2) || (count < 4) ) |
61 | goto shortcopy; |
62 | |
63 | /* Then check for misaligned start address */ |
64 | if ((unsigned long)src & 1) { |
65 | *(u8 *)dst = readb(addr: src); |
66 | src++; |
67 | dst++; |
68 | count--; |
69 | if (count < 2) goto bytecopy; |
70 | } |
71 | |
72 | if ((unsigned long)src & 2) { |
73 | *(u16 *)dst = __raw_readw(addr: src); |
74 | src += 2; |
75 | dst += 2; |
76 | count -= 2; |
77 | } |
78 | |
79 | while (count > 3) { |
80 | *(u32 *)dst = __raw_readl(addr: src); |
81 | dst += 4; |
82 | src += 4; |
83 | count -= 4; |
84 | } |
85 | |
86 | shortcopy: |
87 | while (count > 1) { |
88 | *(u16 *)dst = __raw_readw(addr: src); |
89 | src += 2; |
90 | dst += 2; |
91 | count -= 2; |
92 | } |
93 | |
94 | bytecopy: |
95 | while (count--) { |
96 | *(char *)dst = readb(addr: src); |
97 | src++; |
98 | dst++; |
99 | } |
100 | } |
101 | |
102 | /* Sets a block of memory on a device to a given value. |
103 | * Assumes the device can cope with 32-bit transfers. If it can't, |
104 | * don't use this function. |
105 | */ |
106 | void memset_io(volatile void __iomem *addr, unsigned char val, int count) |
107 | { |
108 | u32 val32 = (val << 24) | (val << 16) | (val << 8) | val; |
109 | while ((unsigned long)addr & 3) { |
110 | writeb(val, addr: addr++); |
111 | count--; |
112 | } |
113 | while (count > 3) { |
114 | __raw_writel(val: val32, addr); |
115 | addr += 4; |
116 | count -= 4; |
117 | } |
118 | while (count--) { |
119 | writeb(val, addr: addr++); |
120 | } |
121 | } |
122 | |
123 | /* |
124 | * Read COUNT 8-bit bytes from port PORT into memory starting at |
125 | * SRC. |
126 | */ |
127 | void insb (unsigned long port, void *dst, unsigned long count) |
128 | { |
129 | unsigned char *p; |
130 | |
131 | p = (unsigned char *)dst; |
132 | |
133 | while (((unsigned long)p) & 0x3) { |
134 | if (!count) |
135 | return; |
136 | count--; |
137 | *p = inb(port); |
138 | p++; |
139 | } |
140 | |
141 | while (count >= 4) { |
142 | unsigned int w; |
143 | count -= 4; |
144 | w = inb(port) << 24; |
145 | w |= inb(port) << 16; |
146 | w |= inb(port) << 8; |
147 | w |= inb(port); |
148 | *(unsigned int *) p = w; |
149 | p += 4; |
150 | } |
151 | |
152 | while (count) { |
153 | --count; |
154 | *p = inb(port); |
155 | p++; |
156 | } |
157 | } |
158 | |
159 | |
160 | /* |
161 | * Read COUNT 16-bit words from port PORT into memory starting at |
162 | * SRC. SRC must be at least short aligned. This is used by the |
163 | * IDE driver to read disk sectors. Performance is important, but |
164 | * the interfaces seems to be slow: just using the inlined version |
165 | * of the inw() breaks things. |
166 | */ |
167 | void insw (unsigned long port, void *dst, unsigned long count) |
168 | { |
169 | unsigned int l = 0, l2; |
170 | unsigned char *p; |
171 | |
172 | p = (unsigned char *)dst; |
173 | |
174 | if (!count) |
175 | return; |
176 | |
177 | switch (((unsigned long)p) & 0x3) |
178 | { |
179 | case 0x00: /* Buffer 32-bit aligned */ |
180 | while (count>=2) { |
181 | |
182 | count -= 2; |
183 | l = cpu_to_le16(inw(port)) << 16; |
184 | l |= cpu_to_le16(inw(port)); |
185 | *(unsigned int *)p = l; |
186 | p += 4; |
187 | } |
188 | if (count) { |
189 | *(unsigned short *)p = cpu_to_le16(inw(port)); |
190 | } |
191 | break; |
192 | |
193 | case 0x02: /* Buffer 16-bit aligned */ |
194 | *(unsigned short *)p = cpu_to_le16(inw(port)); |
195 | p += 2; |
196 | count--; |
197 | while (count>=2) { |
198 | |
199 | count -= 2; |
200 | l = cpu_to_le16(inw(port)) << 16; |
201 | l |= cpu_to_le16(inw(port)); |
202 | *(unsigned int *)p = l; |
203 | p += 4; |
204 | } |
205 | if (count) { |
206 | *(unsigned short *)p = cpu_to_le16(inw(port)); |
207 | } |
208 | break; |
209 | |
210 | case 0x01: /* Buffer 8-bit aligned */ |
211 | case 0x03: |
212 | /* I don't bother with 32bit transfers |
213 | * in this case, 16bit will have to do -- DE */ |
214 | --count; |
215 | |
216 | l = cpu_to_le16(inw(port)); |
217 | *p = l >> 8; |
218 | p++; |
219 | while (count--) |
220 | { |
221 | l2 = cpu_to_le16(inw(port)); |
222 | *(unsigned short *)p = (l & 0xff) << 8 | (l2 >> 8); |
223 | p += 2; |
224 | l = l2; |
225 | } |
226 | *p = l & 0xff; |
227 | break; |
228 | } |
229 | } |
230 | |
231 | |
232 | |
233 | /* |
234 | * Read COUNT 32-bit words from port PORT into memory starting at |
235 | * SRC. Now works with any alignment in SRC. Performance is important, |
236 | * but the interfaces seems to be slow: just using the inlined version |
237 | * of the inl() breaks things. |
238 | */ |
239 | void insl (unsigned long port, void *dst, unsigned long count) |
240 | { |
241 | unsigned int l = 0, l2; |
242 | unsigned char *p; |
243 | |
244 | p = (unsigned char *)dst; |
245 | |
246 | if (!count) |
247 | return; |
248 | |
249 | switch (((unsigned long) dst) & 0x3) |
250 | { |
251 | case 0x00: /* Buffer 32-bit aligned */ |
252 | while (count--) |
253 | { |
254 | *(unsigned int *)p = cpu_to_le32(inl(port)); |
255 | p += 4; |
256 | } |
257 | break; |
258 | |
259 | case 0x02: /* Buffer 16-bit aligned */ |
260 | --count; |
261 | |
262 | l = cpu_to_le32(inl(port)); |
263 | *(unsigned short *)p = l >> 16; |
264 | p += 2; |
265 | |
266 | while (count--) |
267 | { |
268 | l2 = cpu_to_le32(inl(port)); |
269 | *(unsigned int *)p = (l & 0xffff) << 16 | (l2 >> 16); |
270 | p += 4; |
271 | l = l2; |
272 | } |
273 | *(unsigned short *)p = l & 0xffff; |
274 | break; |
275 | case 0x01: /* Buffer 8-bit aligned */ |
276 | --count; |
277 | |
278 | l = cpu_to_le32(inl(port)); |
279 | *(unsigned char *)p = l >> 24; |
280 | p++; |
281 | *(unsigned short *)p = (l >> 8) & 0xffff; |
282 | p += 2; |
283 | while (count--) |
284 | { |
285 | l2 = cpu_to_le32(inl(port)); |
286 | *(unsigned int *)p = (l & 0xff) << 24 | (l2 >> 8); |
287 | p += 4; |
288 | l = l2; |
289 | } |
290 | *p = l & 0xff; |
291 | break; |
292 | case 0x03: /* Buffer 8-bit aligned */ |
293 | --count; |
294 | |
295 | l = cpu_to_le32(inl(port)); |
296 | *p = l >> 24; |
297 | p++; |
298 | while (count--) |
299 | { |
300 | l2 = cpu_to_le32(inl(port)); |
301 | *(unsigned int *)p = (l & 0xffffff) << 8 | l2 >> 24; |
302 | p += 4; |
303 | l = l2; |
304 | } |
305 | *(unsigned short *)p = (l >> 8) & 0xffff; |
306 | p += 2; |
307 | *p = l & 0xff; |
308 | break; |
309 | } |
310 | } |
311 | |
312 | |
313 | /* |
314 | * Like insb but in the opposite direction. |
315 | * Don't worry as much about doing aligned memory transfers: |
316 | * doing byte reads the "slow" way isn't nearly as slow as |
317 | * doing byte writes the slow way (no r-m-w cycle). |
318 | */ |
319 | void outsb(unsigned long port, const void * src, unsigned long count) |
320 | { |
321 | const unsigned char *p; |
322 | |
323 | p = (const unsigned char *)src; |
324 | while (count) { |
325 | count--; |
326 | outb(value: *p, port); |
327 | p++; |
328 | } |
329 | } |
330 | |
331 | /* |
332 | * Like insw but in the opposite direction. This is used by the IDE |
333 | * driver to write disk sectors. Performance is important, but the |
334 | * interfaces seems to be slow: just using the inlined version of the |
335 | * outw() breaks things. |
336 | */ |
337 | void outsw (unsigned long port, const void *src, unsigned long count) |
338 | { |
339 | unsigned int l = 0, l2; |
340 | const unsigned char *p; |
341 | |
342 | p = (const unsigned char *)src; |
343 | |
344 | if (!count) |
345 | return; |
346 | |
347 | switch (((unsigned long)p) & 0x3) |
348 | { |
349 | case 0x00: /* Buffer 32-bit aligned */ |
350 | while (count>=2) { |
351 | count -= 2; |
352 | l = *(unsigned int *)p; |
353 | p += 4; |
354 | outw(le16_to_cpu(l >> 16), port); |
355 | outw(le16_to_cpu(l & 0xffff), port); |
356 | } |
357 | if (count) { |
358 | outw(le16_to_cpu(*(unsigned short*)p), port); |
359 | } |
360 | break; |
361 | |
362 | case 0x02: /* Buffer 16-bit aligned */ |
363 | |
364 | outw(le16_to_cpu(*(unsigned short*)p), port); |
365 | p += 2; |
366 | count--; |
367 | |
368 | while (count>=2) { |
369 | count -= 2; |
370 | l = *(unsigned int *)p; |
371 | p += 4; |
372 | outw(le16_to_cpu(l >> 16), port); |
373 | outw(le16_to_cpu(l & 0xffff), port); |
374 | } |
375 | if (count) { |
376 | outw(le16_to_cpu(*(unsigned short *)p), port); |
377 | } |
378 | break; |
379 | |
380 | case 0x01: /* Buffer 8-bit aligned */ |
381 | /* I don't bother with 32bit transfers |
382 | * in this case, 16bit will have to do -- DE */ |
383 | |
384 | l = *p << 8; |
385 | p++; |
386 | count--; |
387 | while (count) |
388 | { |
389 | count--; |
390 | l2 = *(unsigned short *)p; |
391 | p += 2; |
392 | outw(le16_to_cpu(l | l2 >> 8), port); |
393 | l = l2 << 8; |
394 | } |
395 | l2 = *(unsigned char *)p; |
396 | outw (le16_to_cpu(l | l2>>8), port); |
397 | break; |
398 | |
399 | } |
400 | } |
401 | |
402 | |
403 | /* |
404 | * Like insl but in the opposite direction. This is used by the IDE |
405 | * driver to write disk sectors. Works with any alignment in SRC. |
406 | * Performance is important, but the interfaces seems to be slow: |
407 | * just using the inlined version of the outl() breaks things. |
408 | */ |
409 | void outsl (unsigned long port, const void *src, unsigned long count) |
410 | { |
411 | unsigned int l = 0, l2; |
412 | const unsigned char *p; |
413 | |
414 | p = (const unsigned char *)src; |
415 | |
416 | if (!count) |
417 | return; |
418 | |
419 | switch (((unsigned long)p) & 0x3) |
420 | { |
421 | case 0x00: /* Buffer 32-bit aligned */ |
422 | while (count--) |
423 | { |
424 | outl(le32_to_cpu(*(unsigned int *)p), port); |
425 | p += 4; |
426 | } |
427 | break; |
428 | |
429 | case 0x02: /* Buffer 16-bit aligned */ |
430 | --count; |
431 | |
432 | l = *(unsigned short *)p; |
433 | p += 2; |
434 | |
435 | while (count--) |
436 | { |
437 | l2 = *(unsigned int *)p; |
438 | p += 4; |
439 | outl (le32_to_cpu(l << 16 | l2 >> 16), port); |
440 | l = l2; |
441 | } |
442 | l2 = *(unsigned short *)p; |
443 | outl (le32_to_cpu(l << 16 | l2), port); |
444 | break; |
445 | case 0x01: /* Buffer 8-bit aligned */ |
446 | --count; |
447 | |
448 | l = *p << 24; |
449 | p++; |
450 | l |= *(unsigned short *)p << 8; |
451 | p += 2; |
452 | |
453 | while (count--) |
454 | { |
455 | l2 = *(unsigned int *)p; |
456 | p += 4; |
457 | outl (le32_to_cpu(l | l2 >> 24), port); |
458 | l = l2 << 8; |
459 | } |
460 | l2 = *p; |
461 | outl (le32_to_cpu(l | l2), port); |
462 | break; |
463 | case 0x03: /* Buffer 8-bit aligned */ |
464 | --count; |
465 | |
466 | l = *p << 24; |
467 | p++; |
468 | |
469 | while (count--) |
470 | { |
471 | l2 = *(unsigned int *)p; |
472 | p += 4; |
473 | outl (le32_to_cpu(l | l2 >> 8), port); |
474 | l = l2 << 24; |
475 | } |
476 | l2 = *(unsigned short *)p << 16; |
477 | p += 2; |
478 | l2 |= *p; |
479 | outl (le32_to_cpu(l | l2), port); |
480 | break; |
481 | } |
482 | } |
483 | |
484 | EXPORT_SYMBOL(insb); |
485 | EXPORT_SYMBOL(insw); |
486 | EXPORT_SYMBOL(insl); |
487 | EXPORT_SYMBOL(outsb); |
488 | EXPORT_SYMBOL(outsw); |
489 | EXPORT_SYMBOL(outsl); |
490 | |