| 1 | /* SPDX-License-Identifier: GPL-2.0-only */ |
| 2 | /* |
| 3 | * sm3-ce-core.S - SM3 secure hash using ARMv8.2 Crypto Extensions |
| 4 | * |
| 5 | * Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org> |
| 6 | */ |
| 7 | |
| 8 | #include <linux/linkage.h> |
| 9 | #include <linux/cfi_types.h> |
| 10 | #include <asm/assembler.h> |
| 11 | |
| 12 | .irp b, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 |
| 13 | .set .Lv\b\().4s, \b |
| 14 | .endr |
| 15 | |
| 16 | .macro sm3partw1, rd, rn, rm |
| 17 | .inst 0xce60c000 | .L\rd | (.L\rn << 5) | (.L\rm << 16) |
| 18 | .endm |
| 19 | |
| 20 | .macro sm3partw2, rd, rn, rm |
| 21 | .inst 0xce60c400 | .L\rd | (.L\rn << 5) | (.L\rm << 16) |
| 22 | .endm |
| 23 | |
| 24 | .macro sm3ss1, rd, rn, rm, ra |
| 25 | .inst 0xce400000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16) |
| 26 | .endm |
| 27 | |
| 28 | .macro sm3tt1a, rd, rn, rm, imm2 |
| 29 | .inst 0xce408000 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16) |
| 30 | .endm |
| 31 | |
| 32 | .macro sm3tt1b, rd, rn, rm, imm2 |
| 33 | .inst 0xce408400 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16) |
| 34 | .endm |
| 35 | |
| 36 | .macro sm3tt2a, rd, rn, rm, imm2 |
| 37 | .inst 0xce408800 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16) |
| 38 | .endm |
| 39 | |
| 40 | .macro sm3tt2b, rd, rn, rm, imm2 |
| 41 | .inst 0xce408c00 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16) |
| 42 | .endm |
| 43 | |
| 44 | .macro round, ab, s0, t0, t1, i |
| 45 | sm3ss1 v5.4s, v8.4s, \t0\().4s, v9.4s |
| 46 | shl \t1\().4s, \t0\().4s, #1 |
| 47 | sri \t1\().4s, \t0\().4s, #31 |
| 48 | sm3tt1\ab v8.4s, v5.4s, v10.4s, \i |
| 49 | sm3tt2\ab v9.4s, v5.4s, \s0\().4s, \i |
| 50 | .endm |
| 51 | |
| 52 | .macro qround, ab, s0, s1, s2, s3, s4 |
| 53 | .ifnb \s4 |
| 54 | ext \s4\().16b, \s1\().16b, \s2\().16b, #12 |
| 55 | ext v6.16b, \s0\().16b, \s1\().16b, #12 |
| 56 | ext v7.16b, \s2\().16b, \s3\().16b, #8 |
| 57 | sm3partw1 \s4\().4s, \s0\().4s, \s3\().4s |
| 58 | .endif |
| 59 | |
| 60 | eor v10.16b, \s0\().16b, \s1\().16b |
| 61 | |
| 62 | round \ab, \s0, v11, v12, 0 |
| 63 | round \ab, \s0, v12, v11, 1 |
| 64 | round \ab, \s0, v11, v12, 2 |
| 65 | round \ab, \s0, v12, v11, 3 |
| 66 | |
| 67 | .ifnb \s4 |
| 68 | sm3partw2 \s4\().4s, v7.4s, v6.4s |
| 69 | .endif |
| 70 | .endm |
| 71 | |
| 72 | /* |
| 73 | * void sm3_ce_transform(struct sm3_state *sst, u8 const *src, |
| 74 | * int blocks) |
| 75 | */ |
| 76 | .text |
| 77 | SYM_TYPED_FUNC_START(sm3_ce_transform) |
| 78 | /* load state */ |
| 79 | ld1 {v8.4s-v9.4s}, [x0] |
| 80 | rev64 v8.4s, v8.4s |
| 81 | rev64 v9.4s, v9.4s |
| 82 | ext v8.16b, v8.16b, v8.16b, #8 |
| 83 | ext v9.16b, v9.16b, v9.16b, #8 |
| 84 | |
| 85 | adr_l x8, .Lt |
| 86 | ldp s13, s14, [x8] |
| 87 | |
| 88 | /* load input */ |
| 89 | 0: ld1 {v0.16b-v3.16b}, [x1], #64 |
| 90 | sub w2, w2, #1 |
| 91 | |
| 92 | mov v15.16b, v8.16b |
| 93 | mov v16.16b, v9.16b |
| 94 | |
| 95 | CPU_LE( rev32 v0.16b, v0.16b ) |
| 96 | CPU_LE( rev32 v1.16b, v1.16b ) |
| 97 | CPU_LE( rev32 v2.16b, v2.16b ) |
| 98 | CPU_LE( rev32 v3.16b, v3.16b ) |
| 99 | |
| 100 | ext v11.16b, v13.16b, v13.16b, #4 |
| 101 | |
| 102 | qround a, v0, v1, v2, v3, v4 |
| 103 | qround a, v1, v2, v3, v4, v0 |
| 104 | qround a, v2, v3, v4, v0, v1 |
| 105 | qround a, v3, v4, v0, v1, v2 |
| 106 | |
| 107 | ext v11.16b, v14.16b, v14.16b, #4 |
| 108 | |
| 109 | qround b, v4, v0, v1, v2, v3 |
| 110 | qround b, v0, v1, v2, v3, v4 |
| 111 | qround b, v1, v2, v3, v4, v0 |
| 112 | qround b, v2, v3, v4, v0, v1 |
| 113 | qround b, v3, v4, v0, v1, v2 |
| 114 | qround b, v4, v0, v1, v2, v3 |
| 115 | qround b, v0, v1, v2, v3, v4 |
| 116 | qround b, v1, v2, v3, v4, v0 |
| 117 | qround b, v2, v3, v4, v0, v1 |
| 118 | qround b, v3, v4 |
| 119 | qround b, v4, v0 |
| 120 | qround b, v0, v1 |
| 121 | |
| 122 | eor v8.16b, v8.16b, v15.16b |
| 123 | eor v9.16b, v9.16b, v16.16b |
| 124 | |
| 125 | /* handled all input blocks? */ |
| 126 | cbnz w2, 0b |
| 127 | |
| 128 | /* save state */ |
| 129 | rev64 v8.4s, v8.4s |
| 130 | rev64 v9.4s, v9.4s |
| 131 | ext v8.16b, v8.16b, v8.16b, #8 |
| 132 | ext v9.16b, v9.16b, v9.16b, #8 |
| 133 | st1 {v8.4s-v9.4s}, [x0] |
| 134 | ret |
| 135 | SYM_FUNC_END(sm3_ce_transform) |
| 136 | |
| 137 | .section ".rodata" , "a" |
| 138 | .align 3 |
| 139 | .Lt: .word 0x79cc4519, 0x9d8a7a87 |
| 140 | |