testutils.c: Fix high bits of the mpz_urandomb used with mini-gmp.
[gd/nettle] / serpent-encrypt.c
1 /* serpent-encrypt.c
2
3    The serpent block cipher.
4
5    For more details on this algorithm, see the Serpent website at
6    http://www.cl.cam.ac.uk/~rja14/serpent.html
7
8    Copyright (C) 2011  Niels Möller
9    Copyright (C) 2010, 2011  Simon Josefsson
10    Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
11
12    This file is part of GNU Nettle.
13
14    GNU Nettle is free software: you can redistribute it and/or
15    modify it under the terms of either:
16
17      * the GNU Lesser General Public License as published by the Free
18        Software Foundation; either version 3 of the License, or (at your
19        option) any later version.
20
21    or
22
23      * the GNU General Public License as published by the Free
24        Software Foundation; either version 2 of the License, or (at your
25        option) any later version.
26
27    or both in parallel, as here.
28
29    GNU Nettle is distributed in the hope that it will be useful,
30    but WITHOUT ANY WARRANTY; without even the implied warranty of
31    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
32    General Public License for more details.
33
34    You should have received copies of the GNU General Public License and
35    the GNU Lesser General Public License along with this program.  If
36    not, see http://www.gnu.org/licenses/.
37 */
38
39 /* This file is derived from cipher/serpent.c in Libgcrypt v1.4.6.
40    The adaption to Nettle was made by Simon Josefsson on 2010-12-07
41    with final touches on 2011-05-30.  Changes include replacing
42    libgcrypt with nettle in the license template, renaming
43    serpent_context to serpent_ctx, renaming u32 to uint32_t, removing
44    libgcrypt stubs and selftests, modifying entry function prototypes,
45    using FOR_BLOCKS to iterate through data in encrypt/decrypt, using
46    LE_READ_UINT32 and LE_WRITE_UINT32 to access data in
47    encrypt/decrypt, and running indent on the code. */
48
49 #if HAVE_CONFIG_H
50 #include "config.h"
51 #endif
52
53 #include <assert.h>
54 #include <limits.h>
55
56 #include "serpent.h"
57
58 #include "macros.h"
59 #include "serpent-internal.h"
60
61 /* These are the S-Boxes of Serpent.  They are copied from Serpents
62    reference implementation (the optimized one, contained in
63    `floppy2') and are therefore:
64
65      Copyright (C) 1998 Ross Anderson, Eli Biham, Lars Knudsen.
66
67   To quote the Serpent homepage
68   (http://www.cl.cam.ac.uk/~rja14/serpent.html):
69
70   "Serpent is now completely in the public domain, and we impose no
71    restrictions on its use.  This was announced on the 21st August at
72    the First AES Candidate Conference. The optimised implementations
73    in the submission package are now under the GNU PUBLIC LICENSE
74    (GPL), although some comments in the code still say otherwise. You
75    are welcome to use Serpent for any application."  */
76
77 /* S0:  3  8 15  1 10  6  5 11 14 13  4  2  7  0  9 12 */
78 /* Could easily let y0, y1 overlap with x0, x1, and possibly also x2 and y2 */
79 #define SBOX0(x0, x1, x2, x3, y0, y1, y2, y3)   \
80   do {                                                  \
81     y3  = x1 ^ x2;                                      \
82     y0  = x0 | x3;                                      \
83     y1  = x0 ^ x1;                                      \
84     y3 ^= y0;                                           \
85     y2  = x2 | y3;                                      \
86     x0 ^= x3;                                           \
87     y2 &= x3;                                           \
88     x3 ^= x2;                                           \
89     x2 |= x1;                                           \
90     y0  = y1 & x2;                                      \
91     y2 ^= y0;                                           \
92     y0 &= y2;                                           \
93     y0 ^= x2;                                           \
94     x1 &= x0;                                           \
95     y0 ^= x0;                                           \
96     y0  = ~ y0;                                         \
97     y1  = y0 ^ x1;                                      \
98     y1 ^= x3;                                           \
99   } while (0)
100
101 /* FIXME: Arrange for some overlap between inputs and outputs? */
102 /* S1: 15 12  2  7  9  0  5 10  1 11 14  8  6 13  3  4 */
103 /* Original single-assignment form:
104    
105      t01 = x0  | x3;
106      t02 = x2  ^ x3;
107      t03 =     ~ x1;
108      t04 = x0  ^ x2;
109      t05 = x0  | t03;
110      t06 = x3  & t04;
111      t07 = t01 & t02;
112      t08 = x1  | t06;
113      y2  = t02 ^ t05;
114      t10 = t07 ^ t08;
115      t11 = t01 ^ t10;
116      t12 = y2  ^ t11;
117      t13 = x1  & x3;
118      y3  =     ~ t10;
119      y1  = t13 ^ t12;
120      t16 = t10 | y1;
121      t17 = t05 & t16;
122      y0  = x2  ^ t17;
123 */
124 #define SBOX1(x0, x1, x2, x3, y0, y1, y2, y3)           \
125   do {                                                  \
126     y1  = x0 | x3;                                      \
127     y2  = x2 ^ x3;                                      \
128     y0  = ~ x1;                                         \
129     y3  = x0 ^ x2;                                      \
130     y0 |= x0;                                           \
131     y3 &= x3;                                           \
132     x0 = y1 & y2;                                       \
133     y3 |= x1;                                           \
134     y2 ^= y0;                                           \
135     y3 ^= x0;                                           \
136     x0  = y1 ^ y3;                                      \
137     x0 ^= y2;                                           \
138     y1  = x1 & x3;                                      \
139     y1 ^= x0;                                           \
140     x3  = y1 | y3;                                      \
141     y3  = ~ y3;                                         \
142     y0 &= x3;                                           \
143     y0 ^= x2;                                           \
144   } while (0)
145
146 /* FIXME: Arrange for some overlap between inputs and outputs? */
147 /* S2:  8  6  7  9  3 12 10 15 13  1 14  4  0 11  5  2 */
148 #define SBOX2(x0, x1, x2, x3, y0, y1, y2, y3)   \
149   do {                                                  \
150     y2  = x0 | x2;                                      \
151     y1  = x0 ^ x1;                                      \
152     y3  = x3 ^ y2;                                      \
153     y0  = y1 ^ y3;                                      \
154     x3 |= x0;                                           \
155     x2 ^= y0;                                           \
156     x0  = x1 ^ x2;                                      \
157     x2 |= x1;                                           \
158     x0 &= y2;                                           \
159     y3 ^= x2;                                           \
160     y1 |= y3;                                           \
161     y1 ^= x0;                                           \
162     y2  = y3 ^ y1;                                      \
163     y2 ^= x1;                                           \
164     y3  = ~ y3;                                         \
165     y2 ^= x3;                                           \
166   } while (0)
167
168 /* S3:  0 15 11  8 12  9  6  3 13  1  2  4 10  7  5 14 */
169 /* Original single-assignment form:
170    
171      t01 = x0  ^ x2;
172      t02 = x0  | x3;
173      t03 = x0  & x3;
174      t04 = t01 & t02;
175      t05 = x1  | t03;
176      t06 = x0  & x1;
177      t07 = x3  ^ t04;
178      t08 = x2  | t06;
179      t09 = x1  ^ t07;
180      t10 = x3  & t05;
181      t11 = t02 ^ t10;
182      y3  = t08 ^ t09;
183      t13 = x3  | y3;
184      t14 = x0  | t07;
185      t15 = x1  & t13;
186      y2  = t08 ^ t11;
187      y0  = t14 ^ t15;
188      y1  = t05 ^ t04;
189 */
190 #define SBOX3(x0, x1, x2, x3, y0, y1, y2, y3)   \
191   do {                                                  \
192     y1  = x0 ^ x2;                                      \
193     y0  = x0 | x3;                                      \
194     y3  = x0 & x3;                                      \
195     y1 &= y0;                                           \
196     y3 |= x1;                                           \
197     y2  = x0 & x1;                                      \
198     y2 |= x2;                                           \
199     x2  = x3 ^ y1;                                      \
200     y1 ^= y3;                                           \
201     x0 |= x2;                                           \
202     x2 ^= x1;                                           \
203     y3 &= x3;                                           \
204     y0 ^= y3;                                           \
205     y3  = y2 ^ x2;                                      \
206     y2 ^= y0;                                           \
207     x3 |= y3;                                           \
208     x1 &= x3;                                           \
209     y0  = x0 ^ x1;                                      \
210   } while (0)
211
212
213 /* S4:  1 15  8  3 12  0 11  6  2  5  4 10  9 14  7 13 */
214 /* Original single-assignment form:
215     t01 = x0  | x1;
216     t02 = x1  | x2;
217     t03 = x0  ^ t02;
218     t04 = x1  ^ x3;
219     t05 = x3  | t03;
220     t06 = x3  & t01;
221     y3  = t03 ^ t06;
222     t08 = y3  & t04;
223     t09 = t04 & t05;
224     t10 = x2  ^ t06;
225     t11 = x1  & x2;
226     t12 = t04 ^ t08;
227     t13 = t11 | t03;
228     t14 = t10 ^ t09;
229     t15 = x0  & t05;
230     t16 = t11 | t12;
231     y2  = t13 ^ t08;
232     y1  = t15 ^ t16;
233     y0  =     ~ t14;
234 */
235 #define SBOX4(x0, x1, x2, x3, y0, y1, y2, y3)   \
236   do {                                                  \
237     y3  = x0 | x1;                                      \
238     y2  = x1 | x2;                                      \
239     y2 ^= x0;                                           \
240     y3 &= x3;                                           \
241     y0  = x1 ^ x3;                                      \
242     x3 |= y2;                                           \
243     x0 &= x3;                                           \
244     x1 &= x2;                                           \
245     x2 ^= y3;                                           \
246     y3 ^= y2;                                           \
247     y2 |= x1;                                           \
248     y1  = y3 & y0;                                      \
249     y2 ^= y1;                                           \
250     y1 ^= y0;                                           \
251     y1 |= x1;                                           \
252     y1 ^= x0;                                           \
253     y0 &= x3;                                           \
254     y0 ^= x2;                                           \
255     y0  = ~y0;                                          \
256   } while (0)
257
258 /* S5: 15  5  2 11  4 10  9 12  0  3 14  8 13  6  7  1 */
259 /* Original single-assignment form:
260     t01 = x1  ^ x3;
261     t02 = x1  | x3;
262     t03 = x0  & t01;
263     t04 = x2  ^ t02;
264     t05 = t03 ^ t04;
265     y0  =     ~ t05;
266     t07 = x0  ^ t01;
267     t08 = x3  | y0;
268     t09 = x1  | t05;
269     t10 = x3  ^ t08;
270     t11 = x1  | t07;
271     t12 = t03 | y0;
272     t13 = t07 | t10;
273     t14 = t01 ^ t11;
274     y2  = t09 ^ t13;
275     y1  = t07 ^ t08;
276     y3  = t12 ^ t14;
277 */
278 #define SBOX5(x0, x1, x2, x3, y0, y1, y2, y3)   \
279   do {                                                  \
280     y0  = x1 | x3;                                      \
281     y0 ^= x2;                                           \
282     x2  = x1 ^ x3;                                      \
283     y2  = x0 ^ x2;                                      \
284     x0 &= x2;                                           \
285     y0 ^= x0;                                           \
286     y3  = x1 | y2;                                      \
287     x1 |= y0;                                           \
288     y0  = ~y0;                                          \
289     x0 |= y0;                                           \
290     y3 ^= x2;                                           \
291     y3 ^= x0;                                           \
292     y1  = x3 | y0;                                      \
293     x3 ^= y1;                                           \
294     y1 ^= y2;                                           \
295     y2 |= x3;                                           \
296     y2 ^= x1;                                           \
297   } while (0)
298
299 /* S6:  7  2 12  5  8  4  6 11 14  9  1 15 13  3 10  0 */
300 /* Original single-assignment form:
301     t01 = x0  & x3;
302     t02 = x1  ^ x2;
303     t03 = x0  ^ x3;
304     t04 = t01 ^ t02;
305     t05 = x1  | x2;
306     y1  =     ~ t04;
307     t07 = t03 & t05;
308     t08 = x1  & y1;
309     t09 = x0  | x2;
310     t10 = t07 ^ t08;
311     t11 = x1  | x3;
312     t12 = x2  ^ t11;
313     t13 = t09 ^ t10;
314     y2  =     ~ t13;
315     t15 = y1  & t03;
316     y3  = t12 ^ t07;
317     t17 = x0  ^ x1;
318     t18 = y2  ^ t15;
319     y0  = t17 ^ t18;
320 */
321 #define SBOX6(x0, x1, x2, x3, y0, y1, y2, y3)   \
322   do {                                                  \
323     y0  = x0 ^ x3;                                      \
324     y1  = x0 & x3;                                      \
325     y2  = x0 | x2;                                      \
326     x3 |= x1;                                           \
327     x3 ^= x2;                                           \
328     x0 ^= x1;                                           \
329     y3  = x1 | x2;                                      \
330     x2 ^= x1;                                           \
331     y3 &= y0;                                           \
332     y1 ^= x2;                                           \
333     y1  = ~y1;                                          \
334     y0 &= y1;                                           \
335     x1 &= y1;                                           \
336     x1 ^= y3;                                           \
337     y3 ^= x3;                                           \
338     y2 ^= x1;                                           \
339     y2  = ~y2;                                          \
340     y0 ^= y2;                                           \
341     y0 ^= x0;                                           \
342   } while (0)
343
344 /* S7:  1 13 15  0 14  8  2 11  7  4 12 10  9  3  5  6 */
345 /* Original single-assignment form:
346     t01 = x0  & x2;
347     t02 =     ~ x3;
348     t03 = x0  & t02;
349     t04 = x1  | t01;
350     t05 = x0  & x1;
351     t06 = x2  ^ t04;
352     y3  = t03 ^ t06;
353     t08 = x2  | y3;
354     t09 = x3  | t05;
355     t10 = x0  ^ t08;
356     t11 = t04 & y3;
357     y1  = t09 ^ t10;
358     t13 = x1  ^ y1;
359     t14 = t01 ^ y1;
360     t15 = x2  ^ t05;
361     t16 = t11 | t13;
362     t17 = t02 | t14;
363     y0  = t15 ^ t17;
364     y2  = x0  ^ t16;
365 */
366 /* It appears impossible to do this with only 8 registers. We
367    recompute t02, and t04 (if we have spare registers, hopefully the
368    compiler can recognize them as common subexpressions). */
369 #define SBOX7(x0, x1, x2, x3, y0, y1, y2, y3)   \
370   do {                                                  \
371     y0  = x0 & x2;                                      \
372     y3  = x1 | y0; /* t04 */                            \
373     y3 ^= x2;                                           \
374     y1  = ~x3;     /* t02 */                            \
375     y1 &= x0;                                           \
376     y3 ^= y1;                                           \
377     y1  = x2 | y3;                                      \
378     y1 ^= x0;                                           \
379     y2  = x0 & x1;                                      \
380     x2 ^= y2;                                           \
381     y2 |= x3;                                           \
382     y1 ^= y2;                                           \
383     y2  = x1 | y0; /* t04 */                            \
384     y2 &= y3;                                           \
385     x1 ^= y1;                                           \
386     y2 |= x1;                                           \
387     y2 ^= x0;                                           \
388     y0 ^= y1;                                           \
389     x3  = ~x3;     /* t02 */                            \
390     y0 |= x3;                                           \
391     y0 ^= x2;                                           \
392   } while (0)
393
394 /* In-place linear transformation.  */
395 #define LINEAR_TRANSFORMATION(x0,x1,x2,x3)               \
396   do {                                                   \
397     x0 = ROTL32 (13, x0);                    \
398     x2 = ROTL32 (3, x2);                     \
399     x1 = x1 ^ x0 ^ x2;        \
400     x3 = x3 ^ x2 ^ (x0 << 3); \
401     x1 = ROTL32 (1, x1);                     \
402     x3 = ROTL32 (7, x3);                     \
403     x0 = x0 ^ x1 ^ x3;        \
404     x2 = x2 ^ x3 ^ (x1 << 7); \
405     x0 = ROTL32 (5, x0);                     \
406     x2 = ROTL32 (22, x2);                    \
407   } while (0)
408
409 /* Round inputs are x0,x1,x2,x3 (destroyed), and round outputs are
410    y0,y1,y2,y3. */
411 #define ROUND(which, subkey, x0,x1,x2,x3, y0,y1,y2,y3) \
412   do {                                                 \
413     KEYXOR(x0,x1,x2,x3, subkey);                       \
414     SBOX##which(x0,x1,x2,x3, y0,y1,y2,y3);             \
415     LINEAR_TRANSFORMATION(y0,y1,y2,y3);                \
416   } while (0)
417
418 #if HAVE_NATIVE_64_BIT
419
420 #define LINEAR_TRANSFORMATION64(x0,x1,x2,x3)             \
421   do {                                                   \
422     x0 = DROTL32 (13, x0);                    \
423     x2 = DROTL32 (3, x2);                     \
424     x1 = x1 ^ x0 ^ x2;        \
425     x3 = x3 ^ x2 ^ DRSHIFT32(3, x0);        \
426     x1 = DROTL32 (1, x1);                     \
427     x3 = DROTL32 (7, x3);                     \
428     x0 = x0 ^ x1 ^ x3;        \
429     x2 = x2 ^ x3 ^ DRSHIFT32(7, x1);        \
430     x0 = DROTL32 (5, x0);                     \
431     x2 = DROTL32 (22, x2);                    \
432   } while (0)
433
434 #define ROUND64(which, subkey, x0,x1,x2,x3, y0,y1,y2,y3) \
435   do {                                                 \
436     KEYXOR64(x0,x1,x2,x3, subkey);                     \
437     SBOX##which(x0,x1,x2,x3, y0,y1,y2,y3);             \
438     LINEAR_TRANSFORMATION64(y0,y1,y2,y3);                      \
439   } while (0)
440
441 #endif /* HAVE_NATIVE_64_BIT */
442
443 void
444 serpent_encrypt (const struct serpent_ctx *ctx,
445                  size_t length, uint8_t * dst, const uint8_t * src)
446 {
447   assert( !(length % SERPENT_BLOCK_SIZE));
448   
449 #if HAVE_NATIVE_64_BIT
450   if (length & SERPENT_BLOCK_SIZE)
451 #else
452   while (length >= SERPENT_BLOCK_SIZE)
453 #endif
454     {
455       uint32_t x0,x1,x2,x3, y0,y1,y2,y3;
456       unsigned k;
457
458       x0 = LE_READ_UINT32 (src);
459       x1 = LE_READ_UINT32 (src + 4);
460       x2 = LE_READ_UINT32 (src + 8);
461       x3 = LE_READ_UINT32 (src + 12);
462
463       for (k = 0; ; k += 8)
464         {
465           ROUND (0, ctx->keys[k+0], x0,x1,x2,x3, y0,y1,y2,y3);
466           ROUND (1, ctx->keys[k+1], y0,y1,y2,y3, x0,x1,x2,x3);
467           ROUND (2, ctx->keys[k+2], x0,x1,x2,x3, y0,y1,y2,y3);
468           ROUND (3, ctx->keys[k+3], y0,y1,y2,y3, x0,x1,x2,x3);
469           ROUND (4, ctx->keys[k+4], x0,x1,x2,x3, y0,y1,y2,y3);
470           ROUND (5, ctx->keys[k+5], y0,y1,y2,y3, x0,x1,x2,x3);
471           ROUND (6, ctx->keys[k+6], x0,x1,x2,x3, y0,y1,y2,y3);
472           if (k == 24)
473             break;
474           ROUND (7, ctx->keys[k+7], y0,y1,y2,y3, x0,x1,x2,x3);
475         }
476
477       /* Special final round, using two subkeys. */
478       KEYXOR (y0,y1,y2,y3, ctx->keys[31]);
479       SBOX7 (y0,y1,y2,y3, x0,x1,x2,x3);
480       KEYXOR (x0,x1,x2,x3, ctx->keys[32]);
481     
482       LE_WRITE_UINT32 (dst, x0);
483       LE_WRITE_UINT32 (dst + 4, x1);
484       LE_WRITE_UINT32 (dst + 8, x2);
485       LE_WRITE_UINT32 (dst + 12, x3);
486
487       src += SERPENT_BLOCK_SIZE;
488       dst += SERPENT_BLOCK_SIZE;
489       length -= SERPENT_BLOCK_SIZE;
490     }
491 #if HAVE_NATIVE_64_BIT
492   FOR_BLOCKS(length, dst, src, 2*SERPENT_BLOCK_SIZE)
493     {
494       uint64_t x0,x1,x2,x3, y0,y1,y2,y3;
495       unsigned k;
496
497       x0 = LE_READ_UINT32 (src);
498       x1 = LE_READ_UINT32 (src + 4);
499       x2 = LE_READ_UINT32 (src + 8);
500       x3 = LE_READ_UINT32 (src + 12);
501
502       x0 <<= 32; x0 |= LE_READ_UINT32 (src + 16);
503       x1 <<= 32; x1 |= LE_READ_UINT32 (src + 20);
504       x2 <<= 32; x2 |= LE_READ_UINT32 (src + 24);
505       x3 <<= 32; x3 |= LE_READ_UINT32 (src + 28);
506
507       for (k = 0; ; k += 8)
508         {
509           ROUND64 (0, ctx->keys[k+0], x0,x1,x2,x3, y0,y1,y2,y3);
510           ROUND64 (1, ctx->keys[k+1], y0,y1,y2,y3, x0,x1,x2,x3);
511           ROUND64 (2, ctx->keys[k+2], x0,x1,x2,x3, y0,y1,y2,y3);
512           ROUND64 (3, ctx->keys[k+3], y0,y1,y2,y3, x0,x1,x2,x3);
513           ROUND64 (4, ctx->keys[k+4], x0,x1,x2,x3, y0,y1,y2,y3);
514           ROUND64 (5, ctx->keys[k+5], y0,y1,y2,y3, x0,x1,x2,x3);
515           ROUND64 (6, ctx->keys[k+6], x0,x1,x2,x3, y0,y1,y2,y3);
516           if (k == 24)
517             break;
518           ROUND64 (7, ctx->keys[k+7], y0,y1,y2,y3, x0,x1,x2,x3);
519         }
520
521       /* Special final round, using two subkeys. */
522       KEYXOR64 (y0,y1,y2,y3, ctx->keys[31]);
523       SBOX7 (y0,y1,y2,y3, x0,x1,x2,x3);
524       KEYXOR64 (x0,x1,x2,x3, ctx->keys[32]);
525     
526       LE_WRITE_UINT32 (dst + 16, x0);
527       LE_WRITE_UINT32 (dst + 20, x1);
528       LE_WRITE_UINT32 (dst + 24, x2);
529       LE_WRITE_UINT32 (dst + 28, x3);
530       x0 >>= 32; LE_WRITE_UINT32 (dst, x0);
531       x1 >>= 32; LE_WRITE_UINT32 (dst + 4, x1);
532       x2 >>= 32; LE_WRITE_UINT32 (dst + 8, x2);
533       x3 >>= 32; LE_WRITE_UINT32 (dst + 12, x3);
534     }
535 #endif /* HAVE_NATIVE_64_BIT */
536 }