| 1 | /* Optimized strncmp implementation for PowerPC476. |
| 2 | Copyright (C) 2010-2024 Free Software Foundation, Inc. |
| 3 | This file is part of the GNU C Library. |
| 4 | |
| 5 | The GNU C Library is free software; you can redistribute it and/or |
| 6 | modify it under the terms of the GNU Lesser General Public |
| 7 | License as published by the Free Software Foundation; either |
| 8 | version 2.1 of the License, or (at your option) any later version. |
| 9 | |
| 10 | The GNU C Library is distributed in the hope that it will be useful, |
| 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 13 | Lesser General Public License for more details. |
| 14 | |
| 15 | You should have received a copy of the GNU Lesser General Public |
| 16 | License along with the GNU C Library. If not, see |
| 17 | <https://www.gnu.org/licenses/>. */ |
| 18 | |
| 19 | #include <sysdep.h> |
| 20 | |
| 21 | /* strncmp |
| 22 | |
| 23 | Register Use |
| 24 | r0:temp return equality |
| 25 | r3:source1 address, return equality |
| 26 | r4:source2 address |
| 27 | r5:byte count |
| 28 | |
| 29 | Implementation description |
| 30 | Touch in 3 lines of D-cache. |
| 31 | If source1 or source2 is unaligned copy 0-3 bytes to make source1 aligned |
| 32 | Check 2 words from src1 and src2. If unequal jump to end and |
| 33 | return src1 > src2 or src1 < src2. |
| 34 | If null check bytes before null and then jump to end and |
| 35 | return src1 > src2, src1 < src2 or src1 = src2. |
| 36 | If count = zero check bytes before zero counter and then jump to end and |
| 37 | return src1 > src2, src1 < src2 or src1 = src2. |
| 38 | If src1 = src2 and no null, repeat. */ |
| 39 | |
| 40 | EALIGN (strncmp,5,0) |
| 41 | neg r7,r3 |
| 42 | clrlwi r7,r7,20 |
| 43 | neg r8,r4 |
| 44 | clrlwi r8,r8,20 |
| 45 | srwi. r7,r7,3 |
| 46 | beq L(prebyte_count_loop) |
| 47 | srwi. r8,r8,3 |
| 48 | beq L(prebyte_count_loop) |
| 49 | cmplw r7,r8 |
| 50 | mtctr r7 |
| 51 | ble L(preword2_count_loop) |
| 52 | mtctr r8 |
| 53 | |
| 54 | L(preword2_count_loop): |
| 55 | srwi. r6,r5,3 |
| 56 | beq L(prebyte_count_loop) |
| 57 | mfctr r7 |
| 58 | cmplw r6,r7 |
| 59 | bgt L(set_count_loop) |
| 60 | mtctr r6 |
| 61 | clrlwi r5,r5,29 |
| 62 | |
| 63 | L(word2_count_loop): |
| 64 | lwz r10,0(r3) |
| 65 | lwz r6,4(r3) |
| 66 | addi r3,r3,0x08 |
| 67 | lwz r8,0(r4) |
| 68 | lwz r9,4(r4) |
| 69 | addi r4,r4,0x08 |
| 70 | dlmzb. r12,r10,r6 |
| 71 | bne L(end_check) |
| 72 | cmplw r10,r8 |
| 73 | bne L(st1) |
| 74 | cmplw r6,r9 |
| 75 | bne L(st1) |
| 76 | bdnz L(word2_count_loop) |
| 77 | |
| 78 | L(prebyte_count_loop): |
| 79 | addi r5,r5,1 |
| 80 | mtctr r5 |
| 81 | bdz L(end_strncmp) |
| 82 | |
| 83 | L(byte_count_loop): |
| 84 | lbz r6,0(r3) |
| 85 | addi r3,r3,1 |
| 86 | lbz r7,0(r4) |
| 87 | addi r4,r4,1 |
| 88 | cmplw r6,r7 |
| 89 | bne L(st1) |
| 90 | cmpwi r6,0 |
| 91 | beq L(end_strncmp) |
| 92 | bdnz L(byte_count_loop) |
| 93 | b L(end_strncmp) |
| 94 | |
| 95 | L(set_count_loop): |
| 96 | slwi r7,r7,3 |
| 97 | subf r5,r7,r5 |
| 98 | b L(word2_count_loop) |
| 99 | |
| 100 | L(end_check): |
| 101 | subfic r12,r12,4 |
| 102 | blt L(end_check2) |
| 103 | rlwinm r12,r12,3,0,31 |
| 104 | srw r10,r10,r12 |
| 105 | srw r8,r8,r12 |
| 106 | cmplw r10,r8 |
| 107 | bne L(st1) |
| 108 | b L(end_strncmp) |
| 109 | |
| 110 | L(end_check2): |
| 111 | addi r12,r12,4 |
| 112 | cmplw r10,r8 |
| 113 | rlwinm r12,r12,3,0,31 |
| 114 | bne L(st1) |
| 115 | srw r6,r6,r12 |
| 116 | srw r9,r9,r12 |
| 117 | cmplw r6,r9 |
| 118 | bne L(st1) |
| 119 | |
| 120 | L(end_strncmp): |
| 121 | addi r3,r0,0 |
| 122 | blr |
| 123 | |
| 124 | L(st1): |
| 125 | mfcr r3 |
| 126 | blr |
| 127 | END (strncmp) |
| 128 | libc_hidden_builtin_def (strncmp) |
| 129 | |