| 1 | // SPDX-License-Identifier: GPL-2.0-or-later |
| 2 | /* |
| 3 | * Glue Code for 3-way parallel assembler optimized version of Twofish |
| 4 | * |
| 5 | * Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> |
| 6 | */ |
| 7 | |
| 8 | #include <asm/cpu_device_id.h> |
| 9 | #include <crypto/algapi.h> |
| 10 | #include <crypto/twofish.h> |
| 11 | #include <linux/crypto.h> |
| 12 | #include <linux/export.h> |
| 13 | #include <linux/init.h> |
| 14 | #include <linux/module.h> |
| 15 | #include <linux/types.h> |
| 16 | |
| 17 | #include "twofish.h" |
| 18 | #include "ecb_cbc_helpers.h" |
| 19 | |
| 20 | EXPORT_SYMBOL_GPL(__twofish_enc_blk_3way); |
| 21 | EXPORT_SYMBOL_GPL(twofish_dec_blk_3way); |
| 22 | |
| 23 | static int twofish_setkey_skcipher(struct crypto_skcipher *tfm, |
| 24 | const u8 *key, unsigned int keylen) |
| 25 | { |
| 26 | return twofish_setkey(tfm: &tfm->base, key, key_len: keylen); |
| 27 | } |
| 28 | |
| 29 | static inline void twofish_enc_blk_3way(const void *ctx, u8 *dst, const u8 *src) |
| 30 | { |
| 31 | __twofish_enc_blk_3way(ctx, dst, src, false); |
| 32 | } |
| 33 | |
| 34 | void twofish_dec_blk_cbc_3way(const void *ctx, u8 *dst, const u8 *src) |
| 35 | { |
| 36 | u8 buf[2][TF_BLOCK_SIZE]; |
| 37 | const u8 *s = src; |
| 38 | |
| 39 | if (dst == src) |
| 40 | s = memcpy(buf, src, sizeof(buf)); |
| 41 | twofish_dec_blk_3way(ctx, dst, src); |
| 42 | crypto_xor(dst: dst + TF_BLOCK_SIZE, src: s, size: sizeof(buf)); |
| 43 | |
| 44 | } |
| 45 | EXPORT_SYMBOL_GPL(twofish_dec_blk_cbc_3way); |
| 46 | |
| 47 | static int ecb_encrypt(struct skcipher_request *req) |
| 48 | { |
| 49 | ECB_WALK_START(req, TF_BLOCK_SIZE, -1); |
| 50 | ECB_BLOCK(3, twofish_enc_blk_3way); |
| 51 | ECB_BLOCK(1, twofish_enc_blk); |
| 52 | ECB_WALK_END(); |
| 53 | } |
| 54 | |
| 55 | static int ecb_decrypt(struct skcipher_request *req) |
| 56 | { |
| 57 | ECB_WALK_START(req, TF_BLOCK_SIZE, -1); |
| 58 | ECB_BLOCK(3, twofish_dec_blk_3way); |
| 59 | ECB_BLOCK(1, twofish_dec_blk); |
| 60 | ECB_WALK_END(); |
| 61 | } |
| 62 | |
| 63 | static int cbc_encrypt(struct skcipher_request *req) |
| 64 | { |
| 65 | CBC_WALK_START(req, TF_BLOCK_SIZE, -1); |
| 66 | CBC_ENC_BLOCK(twofish_enc_blk); |
| 67 | CBC_WALK_END(); |
| 68 | } |
| 69 | |
| 70 | static int cbc_decrypt(struct skcipher_request *req) |
| 71 | { |
| 72 | CBC_WALK_START(req, TF_BLOCK_SIZE, -1); |
| 73 | CBC_DEC_BLOCK(3, twofish_dec_blk_cbc_3way); |
| 74 | CBC_DEC_BLOCK(1, twofish_dec_blk); |
| 75 | CBC_WALK_END(); |
| 76 | } |
| 77 | |
| 78 | static struct skcipher_alg tf_skciphers[] = { |
| 79 | { |
| 80 | .base.cra_name = "ecb(twofish)" , |
| 81 | .base.cra_driver_name = "ecb-twofish-3way" , |
| 82 | .base.cra_priority = 300, |
| 83 | .base.cra_blocksize = TF_BLOCK_SIZE, |
| 84 | .base.cra_ctxsize = sizeof(struct twofish_ctx), |
| 85 | .base.cra_module = THIS_MODULE, |
| 86 | .min_keysize = TF_MIN_KEY_SIZE, |
| 87 | .max_keysize = TF_MAX_KEY_SIZE, |
| 88 | .setkey = twofish_setkey_skcipher, |
| 89 | .encrypt = ecb_encrypt, |
| 90 | .decrypt = ecb_decrypt, |
| 91 | }, { |
| 92 | .base.cra_name = "cbc(twofish)" , |
| 93 | .base.cra_driver_name = "cbc-twofish-3way" , |
| 94 | .base.cra_priority = 300, |
| 95 | .base.cra_blocksize = TF_BLOCK_SIZE, |
| 96 | .base.cra_ctxsize = sizeof(struct twofish_ctx), |
| 97 | .base.cra_module = THIS_MODULE, |
| 98 | .min_keysize = TF_MIN_KEY_SIZE, |
| 99 | .max_keysize = TF_MAX_KEY_SIZE, |
| 100 | .ivsize = TF_BLOCK_SIZE, |
| 101 | .setkey = twofish_setkey_skcipher, |
| 102 | .encrypt = cbc_encrypt, |
| 103 | .decrypt = cbc_decrypt, |
| 104 | }, |
| 105 | }; |
| 106 | |
| 107 | static bool is_blacklisted_cpu(void) |
| 108 | { |
| 109 | if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) |
| 110 | return false; |
| 111 | |
| 112 | switch (boot_cpu_data.x86_vfm) { |
| 113 | case INTEL_ATOM_BONNELL: |
| 114 | case INTEL_ATOM_BONNELL_MID: |
| 115 | case INTEL_ATOM_SALTWELL: |
| 116 | /* |
| 117 | * On Atom, twofish-3way is slower than original assembler |
| 118 | * implementation. Twofish-3way trades off some performance in |
| 119 | * storing blocks in 64bit registers to allow three blocks to |
| 120 | * be processed parallel. Parallel operation then allows gaining |
| 121 | * more performance than was trade off, on out-of-order CPUs. |
| 122 | * However Atom does not benefit from this parallelism and |
| 123 | * should be blacklisted. |
| 124 | */ |
| 125 | return true; |
| 126 | } |
| 127 | |
| 128 | if (boot_cpu_data.x86 == 0x0f) { |
| 129 | /* |
| 130 | * On Pentium 4, twofish-3way is slower than original assembler |
| 131 | * implementation because excessive uses of 64bit rotate and |
| 132 | * left-shifts (which are really slow on P4) needed to store and |
| 133 | * handle 128bit block in two 64bit registers. |
| 134 | */ |
| 135 | return true; |
| 136 | } |
| 137 | |
| 138 | return false; |
| 139 | } |
| 140 | |
| 141 | static int force; |
| 142 | module_param(force, int, 0); |
| 143 | MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist" ); |
| 144 | |
| 145 | static int __init twofish_3way_init(void) |
| 146 | { |
| 147 | if (!force && is_blacklisted_cpu()) { |
| 148 | printk(KERN_INFO |
| 149 | "twofish-x86_64-3way: performance on this CPU " |
| 150 | "would be suboptimal: disabling " |
| 151 | "twofish-x86_64-3way.\n" ); |
| 152 | return -ENODEV; |
| 153 | } |
| 154 | |
| 155 | return crypto_register_skciphers(algs: tf_skciphers, |
| 156 | ARRAY_SIZE(tf_skciphers)); |
| 157 | } |
| 158 | |
| 159 | static void __exit twofish_3way_fini(void) |
| 160 | { |
| 161 | crypto_unregister_skciphers(algs: tf_skciphers, ARRAY_SIZE(tf_skciphers)); |
| 162 | } |
| 163 | |
| 164 | module_init(twofish_3way_init); |
| 165 | module_exit(twofish_3way_fini); |
| 166 | |
| 167 | MODULE_LICENSE("GPL" ); |
| 168 | MODULE_DESCRIPTION("Twofish Cipher Algorithm, 3-way parallel asm optimized" ); |
| 169 | MODULE_ALIAS_CRYPTO("twofish" ); |
| 170 | MODULE_ALIAS_CRYPTO("twofish-asm" ); |
| 171 | |