3 The twofish block cipher.
5 Copyright (C) 2001, 2014 Niels Möller
6 Copyright (C) 1999 Ruud de Rooij <ruud@debian.org>
8 Modifications for lsh, integrated testing
9 Copyright (C) 1999 J.H.M. Dassen (Ray) <jdassen@wi.LeidenUniv.nl>
11 This file is part of GNU Nettle.
13 GNU Nettle is free software: you can redistribute it and/or
14 modify it under the terms of either:
16 * the GNU Lesser General Public License as published by the Free
17 Software Foundation; either version 3 of the License, or (at your
18 option) any later version.
22 * the GNU General Public License as published by the Free
23 Software Foundation; either version 2 of the License, or (at your
24 option) any later version.
26 or both in parallel, as here.
28 GNU Nettle is distributed in the hope that it will be useful,
29 but WITHOUT ANY WARRANTY; without even the implied warranty of
30 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
31 General Public License for more details.
33 You should have received copies of the GNU General Public License and
34 the GNU Lesser General Public License along with this program. If
35 not, see http://www.gnu.org/licenses/.
49 /* Bitwise rotations on 32-bit words. These are defined as macros that
50 * evaluate their argument twice, so do not apply to any expressions with
54 #define rol1(x) (((x) << 1) | (((x) & 0x80000000) >> 31))
55 #define rol8(x) (((x) << 8) | (((x) & 0xFF000000) >> 24))
56 #define rol9(x) (((x) << 9) | (((x) & 0xFF800000) >> 23))
57 #define ror1(x) (((x) >> 1) | (((x) & 0x00000001) << 31))
59 /* ------------------------------------------------------------------------- */
61 /* The permutations q0 and q1. These are fixed permutations on 8-bit values.
62 * The permutations have been computed using the program twofish-data,
63 * which is distributed along with this file.
66 static const uint8_t q0[256] = {
67 0xA9,0x67,0xB3,0xE8,0x04,0xFD,0xA3,0x76,
68 0x9A,0x92,0x80,0x78,0xE4,0xDD,0xD1,0x38,
69 0x0D,0xC6,0x35,0x98,0x18,0xF7,0xEC,0x6C,
70 0x43,0x75,0x37,0x26,0xFA,0x13,0x94,0x48,
71 0xF2,0xD0,0x8B,0x30,0x84,0x54,0xDF,0x23,
72 0x19,0x5B,0x3D,0x59,0xF3,0xAE,0xA2,0x82,
73 0x63,0x01,0x83,0x2E,0xD9,0x51,0x9B,0x7C,
74 0xA6,0xEB,0xA5,0xBE,0x16,0x0C,0xE3,0x61,
75 0xC0,0x8C,0x3A,0xF5,0x73,0x2C,0x25,0x0B,
76 0xBB,0x4E,0x89,0x6B,0x53,0x6A,0xB4,0xF1,
77 0xE1,0xE6,0xBD,0x45,0xE2,0xF4,0xB6,0x66,
78 0xCC,0x95,0x03,0x56,0xD4,0x1C,0x1E,0xD7,
79 0xFB,0xC3,0x8E,0xB5,0xE9,0xCF,0xBF,0xBA,
80 0xEA,0x77,0x39,0xAF,0x33,0xC9,0x62,0x71,
81 0x81,0x79,0x09,0xAD,0x24,0xCD,0xF9,0xD8,
82 0xE5,0xC5,0xB9,0x4D,0x44,0x08,0x86,0xE7,
83 0xA1,0x1D,0xAA,0xED,0x06,0x70,0xB2,0xD2,
84 0x41,0x7B,0xA0,0x11,0x31,0xC2,0x27,0x90,
85 0x20,0xF6,0x60,0xFF,0x96,0x5C,0xB1,0xAB,
86 0x9E,0x9C,0x52,0x1B,0x5F,0x93,0x0A,0xEF,
87 0x91,0x85,0x49,0xEE,0x2D,0x4F,0x8F,0x3B,
88 0x47,0x87,0x6D,0x46,0xD6,0x3E,0x69,0x64,
89 0x2A,0xCE,0xCB,0x2F,0xFC,0x97,0x05,0x7A,
90 0xAC,0x7F,0xD5,0x1A,0x4B,0x0E,0xA7,0x5A,
91 0x28,0x14,0x3F,0x29,0x88,0x3C,0x4C,0x02,
92 0xB8,0xDA,0xB0,0x17,0x55,0x1F,0x8A,0x7D,
93 0x57,0xC7,0x8D,0x74,0xB7,0xC4,0x9F,0x72,
94 0x7E,0x15,0x22,0x12,0x58,0x07,0x99,0x34,
95 0x6E,0x50,0xDE,0x68,0x65,0xBC,0xDB,0xF8,
96 0xC8,0xA8,0x2B,0x40,0xDC,0xFE,0x32,0xA4,
97 0xCA,0x10,0x21,0xF0,0xD3,0x5D,0x0F,0x00,
98 0x6F,0x9D,0x36,0x42,0x4A,0x5E,0xC1,0xE0,
101 static const uint8_t q1[256] = {
102 0x75,0xF3,0xC6,0xF4,0xDB,0x7B,0xFB,0xC8,
103 0x4A,0xD3,0xE6,0x6B,0x45,0x7D,0xE8,0x4B,
104 0xD6,0x32,0xD8,0xFD,0x37,0x71,0xF1,0xE1,
105 0x30,0x0F,0xF8,0x1B,0x87,0xFA,0x06,0x3F,
106 0x5E,0xBA,0xAE,0x5B,0x8A,0x00,0xBC,0x9D,
107 0x6D,0xC1,0xB1,0x0E,0x80,0x5D,0xD2,0xD5,
108 0xA0,0x84,0x07,0x14,0xB5,0x90,0x2C,0xA3,
109 0xB2,0x73,0x4C,0x54,0x92,0x74,0x36,0x51,
110 0x38,0xB0,0xBD,0x5A,0xFC,0x60,0x62,0x96,
111 0x6C,0x42,0xF7,0x10,0x7C,0x28,0x27,0x8C,
112 0x13,0x95,0x9C,0xC7,0x24,0x46,0x3B,0x70,
113 0xCA,0xE3,0x85,0xCB,0x11,0xD0,0x93,0xB8,
114 0xA6,0x83,0x20,0xFF,0x9F,0x77,0xC3,0xCC,
115 0x03,0x6F,0x08,0xBF,0x40,0xE7,0x2B,0xE2,
116 0x79,0x0C,0xAA,0x82,0x41,0x3A,0xEA,0xB9,
117 0xE4,0x9A,0xA4,0x97,0x7E,0xDA,0x7A,0x17,
118 0x66,0x94,0xA1,0x1D,0x3D,0xF0,0xDE,0xB3,
119 0x0B,0x72,0xA7,0x1C,0xEF,0xD1,0x53,0x3E,
120 0x8F,0x33,0x26,0x5F,0xEC,0x76,0x2A,0x49,
121 0x81,0x88,0xEE,0x21,0xC4,0x1A,0xEB,0xD9,
122 0xC5,0x39,0x99,0xCD,0xAD,0x31,0x8B,0x01,
123 0x18,0x23,0xDD,0x1F,0x4E,0x2D,0xF9,0x48,
124 0x4F,0xF2,0x65,0x8E,0x78,0x5C,0x58,0x19,
125 0x8D,0xE5,0x98,0x57,0x67,0x7F,0x05,0x64,
126 0xAF,0x63,0xB6,0xFE,0xF5,0xB7,0x3C,0xA5,
127 0xCE,0xE9,0x68,0x44,0xE0,0x4D,0x43,0x69,
128 0x29,0x2E,0xAC,0x15,0x59,0xA8,0x0A,0x9E,
129 0x6E,0x47,0xDF,0x34,0x35,0x6A,0xCF,0xDC,
130 0x22,0xC9,0xC0,0x9B,0x89,0xD4,0xED,0xAB,
131 0x12,0xA2,0x0D,0x52,0xBB,0x02,0x2F,0xA9,
132 0xD7,0x61,0x1E,0xB4,0x50,0x04,0xF6,0xC2,
133 0x16,0x25,0x86,0x56,0x55,0x09,0xBE,0x91,
136 /* ------------------------------------------------------------------------- */
138 /* uint32_t gf_multiply(uint8_t p, uint8_t a, uint8_t b)
140 * Multiplication in GF(2^8). Larger return type, to avoid need for
141 * type casts when the return value is shifted left.
143 * This function multiplies a times b in the Galois Field GF(2^8) with
144 * primitive polynomial p.
145 * The representation of the polynomials a, b, and p uses bits with
146 * values 2^i to represent the terms x^i. The polynomial p contains an
149 * Note that addition and subtraction in GF(2^8) is simply the XOR
154 gf_multiply(uint8_t p, uint8_t a, uint8_t b)
160 if (a & 1) result ^= shift;
163 if (shift & 0x100) shift ^= p;
168 /* ------------------------------------------------------------------------- */
170 /* The matrix RS as specified in section 4.3 the twofish paper. */
172 static const uint8_t rs_matrix[4][8] = {
173 { 0x01, 0xA4, 0x55, 0x87, 0x5A, 0x58, 0xDB, 0x9E },
174 { 0xA4, 0x56, 0x82, 0xF3, 0x1E, 0xC6, 0x68, 0xE5 },
175 { 0x02, 0xA1, 0xFC, 0xC1, 0x47, 0xAE, 0x3D, 0x19 },
176 { 0xA4, 0x55, 0x87, 0x5A, 0x58, 0xDB, 0x9E, 0x03 } };
178 /* uint32_t compute_s(uint32_t m1, uint32_t m2);
180 * Computes the value RS * M, where M is a byte vector composed of the
181 * bytes of m1 and m2. Arithmetic is done in GF(2^8) with primitive
182 * polynomial x^8 + x^6 + x^3 + x^2 + 1.
184 * This function is used to compute the sub-keys S which are in turn used
185 * to generate the S-boxes.
189 compute_s(uint32_t m1, uint32_t m2)
193 for (i = 0; i < 4; i++)
194 s |= (( gf_multiply(0x4D, m1, rs_matrix[i][0])
195 ^ gf_multiply(0x4D, m1 >> 8, rs_matrix[i][1])
196 ^ gf_multiply(0x4D, m1 >> 16, rs_matrix[i][2])
197 ^ gf_multiply(0x4D, m1 >> 24, rs_matrix[i][3])
198 ^ gf_multiply(0x4D, m2, rs_matrix[i][4])
199 ^ gf_multiply(0x4D, m2 >> 8, rs_matrix[i][5])
200 ^ gf_multiply(0x4D, m2 >> 16, rs_matrix[i][6])
201 ^ gf_multiply(0x4D, m2 >> 24, rs_matrix[i][7])) << (i*8));
205 /* ------------------------------------------------------------------------- */
207 /* This table describes which q S-boxes are used for each byte in each stage
208 * of the function h, cf. figure 2 of the twofish paper.
211 static const uint8_t * const q_table[4][5] =
212 { { q1, q1, q0, q0, q1 },
213 { q0, q1, q1, q0, q0 },
214 { q0, q0, q0, q1, q1 },
215 { q1, q0, q1, q1, q0 } };
217 /* The matrix MDS as specified in section 4.3.2 of the twofish paper. */
219 static const uint8_t mds_matrix[4][4] = { { 0x01, 0xEF, 0x5B, 0x5B },
220 { 0x5B, 0xEF, 0xEF, 0x01 },
221 { 0xEF, 0x5B, 0x01, 0xEF },
222 { 0xEF, 0x01, 0xEF, 0x5B } };
224 /* uint32_t h_uint8_t(int k, int i, uint8_t x, uint8_t l0, uint8_t l1, uint8_t l2, uint8_t l3);
226 * Perform the h function (section 4.3.2) on one byte. It consists of
227 * repeated applications of the q permutation, followed by a XOR with
228 * part of a sub-key. Finally, the value is multiplied by one column of
229 * the MDS matrix. To obtain the result for a full word, the results of
230 * h for the individual bytes are XORed.
232 * k is the key size (/ 64 bits), i is the byte number (0 = LSB), x is the
233 * actual byte to apply the function to; l0, l1, l2, and l3 are the
234 * appropriate bytes from the subkey. Note that only l0..l(k-1) are used.
238 h_byte(int k, int i, uint8_t x, uint8_t l0, uint8_t l1, uint8_t l2, uint8_t l3)
240 uint8_t y = q_table[i][4][l0 ^
242 q_table[i][2][k == 2 ? x : l2 ^
243 q_table[i][1][k == 3 ? x : l3 ^ q_table[i][0][x]]]]];
245 return ( (gf_multiply(0x69, mds_matrix[0][i], y))
246 | (gf_multiply(0x69, mds_matrix[1][i], y) << 8)
247 | (gf_multiply(0x69, mds_matrix[2][i], y) << 16)
248 | (gf_multiply(0x69, mds_matrix[3][i], y) << 24) );
251 /* uint32_t h(int k, uint8_t x, uint32_t l0, uint32_t l1, uint32_t l2, uint32_t l3);
253 * Perform the function h on a word. See the description of h_byte() above.
257 h(int k, uint8_t x, uint32_t l0, uint32_t l1, uint32_t l2, uint32_t l3)
259 return ( h_byte(k, 0, x, l0, l1, l2, l3)
260 ^ h_byte(k, 1, x, l0 >> 8, l1 >> 8, l2 >> 8, l3 >> 8)
261 ^ h_byte(k, 2, x, l0 >> 16, l1 >> 16, l2 >> 16, l3 >> 16)
262 ^ h_byte(k, 3, x, l0 >> 24, l1 >> 24, l2 >> 24, l3 >> 24) );
266 /* ------------------------------------------------------------------------- */
270 /* Structure which contains the tables containing the subkeys and the
271 * key-dependent s-boxes.
275 /* Set up internal tables required for twofish encryption and decryption.
277 * The key size is specified in bytes. Key sizes up to 32 bytes are
278 * supported. Larger key sizes are silently truncated.
282 twofish_set_key(struct twofish_ctx *context,
283 size_t keysize, const uint8_t *key)
285 uint8_t key_copy[32];
286 uint32_t m[8], s[4], t;
289 /* Extend key as necessary */
291 assert(keysize <= 32);
293 /* We do a little more copying than necessary, but that doesn't
295 memset(key_copy, 0, 32);
296 memcpy(key_copy, key, keysize);
298 for (i = 0; i<8; i++)
299 m[i] = LE_READ_UINT32(key_copy + i*4);
303 else if (keysize <= 24)
308 /* Compute sub-keys */
310 for (i = 0; i < 20; i++)
312 t = h(k, 2*i+1, m[1], m[3], m[5], m[7]);
314 t += (context->keys[2*i] =
315 t + h(k, 2*i, m[0], m[2], m[4], m[6]));
317 context->keys[2*i+1] = t;
320 /* Compute key-dependent S-boxes */
322 for (i = 0; i < k; i++)
323 s[k-1-i] = compute_s(m[2*i], m[2*i+1]);
325 for (i = 0; i < 4; i++)
326 for (j = 0; j < 256; j++)
327 context->s_box[i][j] = h_byte(k, i, j,
335 twofish128_set_key(struct twofish_ctx *context, const uint8_t *key)
337 twofish_set_key (context, TWOFISH128_KEY_SIZE, key);
340 twofish192_set_key(struct twofish_ctx *context, const uint8_t *key)
342 twofish_set_key (context, TWOFISH192_KEY_SIZE, key);
345 twofish256_set_key(struct twofish_ctx *context, const uint8_t *key)
347 twofish_set_key (context, TWOFISH256_KEY_SIZE, key);
350 /* Encrypt blocks of 16 bytes of data with the twofish algorithm.
352 * Before this function can be used, twofish_set_key() must be used in order to
353 * set up various tables required for the encryption algorithm.
355 * This function always encrypts 16 bytes of plaintext to 16 bytes of
356 * ciphertext. The memory areas of the plaintext and the ciphertext can
361 twofish_encrypt(const struct twofish_ctx *context,
364 const uint8_t *plaintext)
366 const uint32_t * keys = context->keys;
367 const uint32_t (*s_box)[256] = context->s_box;
369 assert( !(length % TWOFISH_BLOCK_SIZE) );
370 for ( ; length; length -= TWOFISH_BLOCK_SIZE)
373 uint32_t r0, r1, r2, r3, t0, t1;
376 for (i = 0; i<4; i++, plaintext += 4)
377 words[i] = LE_READ_UINT32(plaintext);
379 r0 = words[0] ^ keys[0];
380 r1 = words[1] ^ keys[1];
381 r2 = words[2] ^ keys[2];
382 r3 = words[3] ^ keys[3];
384 for (i = 0; i < 8; i++) {
385 t1 = ( s_box[1][r1 & 0xFF]
386 ^ s_box[2][(r1 >> 8) & 0xFF]
387 ^ s_box[3][(r1 >> 16) & 0xFF]
388 ^ s_box[0][(r1 >> 24) & 0xFF]);
389 t0 = ( s_box[0][r0 & 0xFF]
390 ^ s_box[1][(r0 >> 8) & 0xFF]
391 ^ s_box[2][(r0 >> 16) & 0xFF]
392 ^ s_box[3][(r0 >> 24) & 0xFF]) + t1;
393 r3 = (t1 + t0 + keys[4*i+9]) ^ rol1(r3);
394 r2 = (t0 + keys[4*i+8]) ^ r2;
397 t1 = ( s_box[1][r3 & 0xFF]
398 ^ s_box[2][(r3 >> 8) & 0xFF]
399 ^ s_box[3][(r3 >> 16) & 0xFF]
400 ^ s_box[0][(r3 >> 24) & 0xFF]);
401 t0 = ( s_box[0][r2 & 0xFF]
402 ^ s_box[1][(r2 >> 8) & 0xFF]
403 ^ s_box[2][(r2 >> 16) & 0xFF]
404 ^ s_box[3][(r2 >> 24) & 0xFF]) + t1;
405 r1 = (t1 + t0 + keys[4*i+11]) ^ rol1(r1);
406 r0 = (t0 + keys[4*i+10]) ^ r0;
410 words[0] = r2 ^ keys[4];
411 words[1] = r3 ^ keys[5];
412 words[2] = r0 ^ keys[6];
413 words[3] = r1 ^ keys[7];
415 for (i = 0; i<4; i++, ciphertext += 4)
416 LE_WRITE_UINT32(ciphertext, words[i]);
420 /* Decrypt blocks of 16 bytes of data with the twofish algorithm.
422 * Before this function can be used, twofish_set_key() must be used in order to
423 * set up various tables required for the decryption algorithm.
425 * This function always decrypts 16 bytes of ciphertext to 16 bytes of
426 * plaintext. The memory areas of the plaintext and the ciphertext can
431 twofish_decrypt(const struct twofish_ctx *context,
434 const uint8_t *ciphertext)
437 const uint32_t *keys = context->keys;
438 const uint32_t (*s_box)[256] = context->s_box;
440 assert( !(length % TWOFISH_BLOCK_SIZE) );
441 for ( ; length; length -= TWOFISH_BLOCK_SIZE)
444 uint32_t r0, r1, r2, r3, t0, t1;
447 for (i = 0; i<4; i++, ciphertext += 4)
448 words[i] = LE_READ_UINT32(ciphertext);
450 r0 = words[2] ^ keys[6];
451 r1 = words[3] ^ keys[7];
452 r2 = words[0] ^ keys[4];
453 r3 = words[1] ^ keys[5];
455 for (i = 0; i < 8; i++) {
456 t1 = ( s_box[1][r3 & 0xFF]
457 ^ s_box[2][(r3 >> 8) & 0xFF]
458 ^ s_box[3][(r3 >> 16) & 0xFF]
459 ^ s_box[0][(r3 >> 24) & 0xFF]);
460 t0 = ( s_box[0][r2 & 0xFF]
461 ^ s_box[1][(r2 >> 8) & 0xFF]
462 ^ s_box[2][(r2 >> 16) & 0xFF]
463 ^ s_box[3][(r2 >> 24) & 0xFF]) + t1;
464 r1 = (t1 + t0 + keys[39-4*i]) ^ r1;
466 r0 = (t0 + keys[38-4*i]) ^ rol1(r0);
468 t1 = ( s_box[1][r1 & 0xFF]
469 ^ s_box[2][(r1 >> 8) & 0xFF]
470 ^ s_box[3][(r1 >> 16) & 0xFF]
471 ^ s_box[0][(r1 >> 24) & 0xFF]);
472 t0 = ( s_box[0][r0 & 0xFF]
473 ^ s_box[1][(r0 >> 8) & 0xFF]
474 ^ s_box[2][(r0 >> 16) & 0xFF]
475 ^ s_box[3][(r0 >> 24) & 0xFF]) + t1;
476 r3 = (t1 + t0 + keys[37-4*i]) ^ r3;
478 r2 = (t0 + keys[36-4*i]) ^ rol1(r2);
481 words[0] = r0 ^ keys[0];
482 words[1] = r1 ^ keys[1];
483 words[2] = r2 ^ keys[2];
484 words[3] = r3 ^ keys[3];
486 for (i = 0; i<4; i++, plaintext += 4)
487 LE_WRITE_UINT32(plaintext, words[i]);