1 |
/* This is an independent implementation of the encryption algorithm: */ |
2 |
/* */ |
3 |
/* LOKI97 by Brown and Pieprzyk */ |
4 |
/* */ |
5 |
/* which is a candidate algorithm in the Advanced Encryption Standard */ |
6 |
/* programme of the US National Institute of Standards and Technology. */ |
7 |
/* */ |
8 |
/* Copyright in this implementation is held by Dr B R Gladman but I */ |
9 |
/* hereby give permission for its free direct or derivative use subject */ |
10 |
/* to acknowledgment of its origin and compliance with any conditions */ |
11 |
/* that the originators of the algorithm place on its exploitation. */ |
12 |
/* */ |
13 |
/* Dr Brian Gladman (gladman@seven77.demon.co.uk) 14th January 1999 */ |
14 |
|
15 |
/* $Id: loki97.c,v 1.1.1.1 1999/10/15 22:49:23 nmav Exp $ */ |
16 |
|
17 |
/* modified for mcrypt */ |
18 |
|
19 |
/* Timing data for LOKI97 (loki.c) |
20 |
|
21 |
Core timing without I/O endian conversion: |
22 |
|
23 |
128 bit key: |
24 |
Key Setup: 7430 cycles |
25 |
Encrypt: 2134 cycles = 12.0 mbits/sec |
26 |
Decrypt: 2192 cycles = 11.7 mbits/sec |
27 |
Mean: 2163 cycles = 11.8 mbits/sec |
28 |
|
29 |
192 bit key: |
30 |
Key Setup: 7303 cycles |
31 |
Encrypt: 2138 cycles = 12.0 mbits/sec |
32 |
Decrypt: 2189 cycles = 11.7 mbits/sec |
33 |
Mean: 2164 cycles = 11.8 mbits/sec |
34 |
|
35 |
256 bit key: |
36 |
Key Setup: 7166 cycles |
37 |
Encrypt: 2131 cycles = 12.0 mbits/sec |
38 |
Decrypt: 2184 cycles = 11.7 mbits/sec |
39 |
Mean: 2158 cycles = 11.9 mbits/sec |
40 |
|
41 |
Full timing with I/O endian conversion: |
42 |
|
43 |
128 bit key: |
44 |
Key Setup: 7582 cycles |
45 |
Encrypt: 2174 cycles = 11.8 mbits/sec |
46 |
Decrypt: 2235 cycles = 11.5 mbits/sec |
47 |
Mean: 2205 cycles = 11.6 mbits/sec |
48 |
|
49 |
192 bit key: |
50 |
Key Setup: 7477 cycles |
51 |
Encrypt: 2167 cycles = 11.8 mbits/sec |
52 |
Decrypt: 2223 cycles = 11.5 mbits/sec |
53 |
Mean: 2195 cycles = 11.7 mbits/sec |
54 |
|
55 |
256 bit key: |
56 |
Key Setup: 7365 cycles |
57 |
Encrypt: 2177 cycles = 11.8 mbits/sec |
58 |
Decrypt: 2194 cycles = 11.7 mbits/sec |
59 |
Mean: 2186 cycles = 11.7 mbits/sec |
60 |
|
61 |
*/ |
62 |
|
63 |
|
64 |
#ifndef LIBDEFS_H |
65 |
# include <libdefs.h> |
66 |
# define LIBDEFS_H |
67 |
#endif |
68 |
#include <swap.h> |
69 |
|
70 |
#define byte(x,n) ((word8)((x) >> (8 * n))) |
71 |
|
72 |
#define S1_SIZE 13 |
73 |
#define S1_LEN (1 << S1_SIZE) |
74 |
#define S1_MASK (S1_LEN - 1) |
75 |
#define S1_HMASK (S1_MASK & ~0xff) |
76 |
#define S1_POLY 0x2911 |
77 |
|
78 |
#define S2_SIZE 11 |
79 |
#define S2_LEN (1 << S2_SIZE) |
80 |
#define S2_MASK (S2_LEN - 1) |
81 |
#define S2_HMASK (S2_MASK & ~0xff) |
82 |
#define S2_POLY 0x0aa7 |
83 |
|
84 |
word32 delta[2] = { 0x7f4a7c15, 0x9e3779b9 }; |
85 |
|
86 |
word8 sb1[S1_LEN]; /* GF(2^11) S box */ |
87 |
word8 sb2[S2_LEN]; /* GF(2^11) S box */ |
88 |
word32 prm[256][2]; |
89 |
word32 init_done = 0; |
90 |
|
91 |
/* word32 l_key[96]; */ |
92 |
|
93 |
#define add_eq(x,y) (x)[1] += (y)[1] + (((x)[0] += (y)[0]) < (y)[0] ? 1 : 0) |
94 |
#define sub_eq(x,y) xs = (x)[0]; (x)[1] -= (y)[1] + (((x)[0] -= (y)[0]) > xs ? 1 : 0) |
95 |
|
96 |
word32 ff_mult(word32 a, word32 b, word32 tpow, word32 mpol) |
97 |
{ |
98 |
word32 r, s, m; |
99 |
|
100 |
r = s = 0; |
101 |
m = (1 << tpow); |
102 |
|
103 |
while (b) { |
104 |
if (b & 1) |
105 |
|
106 |
s ^= a; |
107 |
|
108 |
b >>= 1; |
109 |
a <<= 1; |
110 |
|
111 |
if (a & m) |
112 |
|
113 |
a ^= mpol; |
114 |
} |
115 |
|
116 |
return s; |
117 |
} |
118 |
|
119 |
void init_tables(void) |
120 |
{ |
121 |
word32 i, j, v; |
122 |
|
123 |
/* initialise S box 1 */ |
124 |
|
125 |
for (i = 0; i < S1_LEN; ++i) { |
126 |
j = v = i ^ S1_MASK; |
127 |
v = ff_mult(v, j, S1_SIZE, S1_POLY); |
128 |
sb1[i] = (word8) ff_mult(v, j, S1_SIZE, S1_POLY); |
129 |
} |
130 |
/* initialise S box 2 */ |
131 |
|
132 |
for (i = 0; i < S2_LEN; ++i) { |
133 |
j = v = i ^ S2_MASK; |
134 |
v = ff_mult(v, j, S2_SIZE, S2_POLY); |
135 |
sb2[i] = (word8) ff_mult(v, j, S2_SIZE, S2_POLY); |
136 |
} |
137 |
|
138 |
/* initialise permutation table */ |
139 |
|
140 |
for (i = 0; i < 256; ++i) { |
141 |
prm[i][0] = |
142 |
((i & 1) << 7) | ((i & 2) << 14) | ((i & 4) << 21) | |
143 |
((i & 8) << 28); |
144 |
prm[i][1] = |
145 |
((i & 16) << 3) | ((i & 32) << 10) | ((i & 64) << 17) | |
146 |
((i & 128) << 24); |
147 |
} |
148 |
} |
149 |
|
150 |
void f_fun(word32 res[2], const word32 in[2], const word32 key[2]) |
151 |
{ |
152 |
word32 i, tt[2], pp[2]; |
153 |
|
154 |
/* tt[0] = in[0] & ~key[0] | in[1] & key[0]; |
155 |
* tt[1] = in[1] & ~key[0] | in[0] & key[0]; |
156 |
*/ |
157 |
tt[0] = (in[0] & ~key[0]) | (in[1] & key[0]); |
158 |
tt[1] = (in[1] & ~key[0]) | (in[0] & key[0]); |
159 |
|
160 |
i = sb1[((tt[1] >> 24) | (tt[0] << 8)) & S1_MASK]; |
161 |
pp[0] = prm[i][0] >> 7; |
162 |
pp[1] = prm[i][1] >> 7; |
163 |
i = sb2[(tt[1] >> 16) & S2_MASK]; |
164 |
pp[0] |= prm[i][0] >> 6; |
165 |
pp[1] |= prm[i][1] >> 6; |
166 |
i = sb1[(tt[1] >> 8) & S1_MASK]; |
167 |
pp[0] |= prm[i][0] >> 5; |
168 |
pp[1] |= prm[i][1] >> 5; |
169 |
i = sb2[tt[1] & S2_MASK]; |
170 |
pp[0] |= prm[i][0] >> 4; |
171 |
pp[1] |= prm[i][1] >> 4; |
172 |
i = sb2[((tt[0] >> 24) | (tt[1] << 8)) & S2_MASK]; |
173 |
pp[0] |= prm[i][0] >> 3; |
174 |
pp[1] |= prm[i][1] >> 3; |
175 |
i = sb1[(tt[0] >> 16) & S1_MASK]; |
176 |
pp[0] |= prm[i][0] >> 2; |
177 |
pp[1] |= prm[i][1] >> 2; |
178 |
i = sb2[(tt[0] >> 8) & S2_MASK]; |
179 |
pp[0] |= prm[i][0] >> 1; |
180 |
pp[1] |= prm[i][1] >> 1; |
181 |
i = sb1[tt[0] & S1_MASK]; |
182 |
pp[0] |= prm[i][0]; |
183 |
pp[1] |= prm[i][1]; |
184 |
|
185 |
/* |
186 |
res[0] ^= sb1[byte(pp[0], 0) | (key[1] << 8) & S1_HMASK] |
187 |
| (sb1[byte(pp[0], 1) | (key[1] << 3) & S1_HMASK] << 8) |
188 |
| (sb2[byte(pp[0], 2) | (key[1] >> 2) & S2_HMASK] << 16) |
189 |
| (sb2[byte(pp[0], 3) | (key[1] >> 5) & S2_HMASK] << 24); |
190 |
res[1] ^= sb1[byte(pp[1], 0) | (key[1] >> 8) & S1_HMASK] |
191 |
| (sb1[byte(pp[1], 1) | (key[1] >> 13) & S1_HMASK] << 8) |
192 |
| (sb2[byte(pp[1], 2) | (key[1] >> 18) & S2_HMASK] << 16) |
193 |
| (sb2[byte(pp[1], 3) | (key[1] >> 21) & S2_HMASK] << 24); |
194 |
*/ |
195 |
res[0] ^= sb1[byte(pp[0], 0) | ((key[1] << 8) & S1_HMASK)] |
196 |
| ((sb1[byte(pp[0], 1) | ((key[1] << 3) & S1_HMASK)] << 8)) |
197 |
| ((sb2[byte(pp[0], 2) | ((key[1] >> 2) & S2_HMASK)] << 16)) |
198 |
| ((sb2[byte(pp[0], 3) | ((key[1] >> 5) & S2_HMASK)] << 24)); |
199 |
res[1] ^= sb1[byte(pp[1], 0) | ((key[1] >> 8) & S1_HMASK)] |
200 |
| ((sb1[byte(pp[1], 1) | ((key[1] >> 13) & S1_HMASK)] << 8)) |
201 |
| ((sb2[byte(pp[1], 2) | ((key[1] >> 18) & S2_HMASK)] << 16)) |
202 |
| ((sb2[byte(pp[1], 3) | ((key[1] >> 21) & S2_HMASK)] << 24)); |
203 |
|
204 |
} |
205 |
|
206 |
/* 256 bit version only */ |
207 |
void _mcrypt_loki97_set_key(word32 * l_key, const word32 in_key[], |
208 |
const word32 key_len) |
209 |
{ |
210 |
word32 i, k1[2], k2[2], k3[2], k4[2], del[2], tt[2], sk[2]; |
211 |
|
212 |
if (!init_done) { |
213 |
init_tables(); |
214 |
init_done = 1; |
215 |
} |
216 |
#ifdef WORDS_BIGENDIAN |
217 |
k4[0] = byteswap(in_key[1]); |
218 |
k4[1] = byteswap(in_key[0]); |
219 |
k3[0] = byteswap(in_key[3]); |
220 |
k3[1] = byteswap(in_key[2]); |
221 |
#else |
222 |
k4[0] = (in_key[1]); |
223 |
k4[1] = (in_key[0]); |
224 |
k3[0] = (in_key[3]); |
225 |
k3[1] = (in_key[2]); |
226 |
#endif |
227 |
|
228 |
|
229 |
#ifdef WORDS_BIGENDIAN |
230 |
k2[0] = byteswap(in_key[5]); |
231 |
k2[1] = byteswap(in_key[4]); |
232 |
k1[0] = byteswap(in_key[7]); |
233 |
k1[1] = byteswap(in_key[6]); |
234 |
#else |
235 |
k2[0] = (in_key[5]); |
236 |
k2[1] = (in_key[4]); |
237 |
k1[0] = (in_key[7]); |
238 |
k1[1] = (in_key[6]); |
239 |
#endif |
240 |
|
241 |
del[0] = delta[0]; |
242 |
del[1] = delta[1]; |
243 |
|
244 |
for (i = 0; i < 48; ++i) { |
245 |
tt[0] = k1[0]; |
246 |
tt[1] = k1[1]; |
247 |
add_eq(tt, k3); |
248 |
add_eq(tt, del); |
249 |
add_eq(del, delta); |
250 |
sk[0] = k4[0]; |
251 |
sk[1] = k4[1]; |
252 |
k4[0] = k3[0]; |
253 |
k4[1] = k3[1]; |
254 |
k3[0] = k2[0]; |
255 |
k3[1] = k2[1]; |
256 |
k2[0] = k1[0]; |
257 |
k2[1] = k1[1]; |
258 |
k1[0] = sk[0]; |
259 |
k1[1] = sk[1]; |
260 |
f_fun(k1, tt, k3); |
261 |
l_key[i + i] = k1[0]; |
262 |
l_key[i + i + 1] = k1[1]; |
263 |
} |
264 |
|
265 |
} |
266 |
|
267 |
#define r_fun(l,r,k) \ |
268 |
add_eq((l),(k)); \ |
269 |
f_fun((r),(l),(k) + 2); \ |
270 |
add_eq((l), (k) + 4) |
271 |
|
272 |
void _mcrypt_loki97_encrypt(word32 * l_key, word32 * _blk) |
273 |
{ |
274 |
word32 blk[4]; |
275 |
|
276 |
#ifdef WORDS_BIGENDIAN |
277 |
blk[3] = byteswap(_blk[0]); |
278 |
blk[2] = byteswap(_blk[1]); |
279 |
blk[1] = byteswap(_blk[2]); |
280 |
blk[0] = byteswap(_blk[3]); |
281 |
#else |
282 |
blk[3] = (_blk[0]); |
283 |
blk[2] = (_blk[1]); |
284 |
blk[1] = (_blk[2]); |
285 |
blk[0] = (_blk[3]); |
286 |
#endif |
287 |
|
288 |
r_fun(blk, blk + 2, l_key + 0); |
289 |
r_fun(blk + 2, blk, l_key + 6); |
290 |
r_fun(blk, blk + 2, l_key + 12); |
291 |
r_fun(blk + 2, blk, l_key + 18); |
292 |
r_fun(blk, blk + 2, l_key + 24); |
293 |
r_fun(blk + 2, blk, l_key + 30); |
294 |
r_fun(blk, blk + 2, l_key + 36); |
295 |
r_fun(blk + 2, blk, l_key + 42); |
296 |
r_fun(blk, blk + 2, l_key + 48); |
297 |
r_fun(blk + 2, blk, l_key + 54); |
298 |
r_fun(blk, blk + 2, l_key + 60); |
299 |
r_fun(blk + 2, blk, l_key + 66); |
300 |
r_fun(blk, blk + 2, l_key + 72); |
301 |
r_fun(blk + 2, blk, l_key + 78); |
302 |
r_fun(blk, blk + 2, l_key + 84); |
303 |
r_fun(blk + 2, blk, l_key + 90); |
304 |
|
305 |
#ifdef WORDS_BIGENDIAN |
306 |
_blk[3] = byteswap(blk[2]); |
307 |
_blk[2] = byteswap(blk[3]); |
308 |
_blk[1] = byteswap(blk[0]); |
309 |
_blk[0] = byteswap(blk[1]); |
310 |
#else |
311 |
_blk[3] = (blk[2]); |
312 |
_blk[2] = (blk[3]); |
313 |
_blk[1] = (blk[0]); |
314 |
_blk[0] = (blk[1]); |
315 |
#endif |
316 |
} |
317 |
|
318 |
#define ir_fun(l,r,k) \ |
319 |
sub_eq((l),(k) + 4); \ |
320 |
f_fun((r),(l),(k) + 2); \ |
321 |
sub_eq((l),(k)) |
322 |
|
323 |
void _mcrypt_loki97_decrypt(word32 * l_key, word32 * _blk) |
324 |
{ |
325 |
word32 xs, blk[4]; |
326 |
|
327 |
#ifdef WORDS_BIGENDIAN |
328 |
blk[3] = byteswap(_blk[0]); |
329 |
blk[2] = byteswap(_blk[1]); |
330 |
blk[1] = byteswap(_blk[2]); |
331 |
blk[0] = byteswap(_blk[3]); |
332 |
#else |
333 |
blk[3] = (_blk[0]); |
334 |
blk[2] = (_blk[1]); |
335 |
blk[1] = (_blk[2]); |
336 |
blk[0] = (_blk[3]); |
337 |
#endif |
338 |
|
339 |
ir_fun(blk, blk + 2, l_key + 90); |
340 |
ir_fun(blk + 2, blk, l_key + 84); |
341 |
ir_fun(blk, blk + 2, l_key + 78); |
342 |
ir_fun(blk + 2, blk, l_key + 72); |
343 |
ir_fun(blk, blk + 2, l_key + 66); |
344 |
ir_fun(blk + 2, blk, l_key + 60); |
345 |
ir_fun(blk, blk + 2, l_key + 54); |
346 |
ir_fun(blk + 2, blk, l_key + 48); |
347 |
ir_fun(blk, blk + 2, l_key + 42); |
348 |
ir_fun(blk + 2, blk, l_key + 36); |
349 |
ir_fun(blk, blk + 2, l_key + 30); |
350 |
ir_fun(blk + 2, blk, l_key + 24); |
351 |
ir_fun(blk, blk + 2, l_key + 18); |
352 |
ir_fun(blk + 2, blk, l_key + 12); |
353 |
ir_fun(blk, blk + 2, l_key + 6); |
354 |
ir_fun(blk + 2, blk, l_key); |
355 |
|
356 |
#ifdef WORDS_BIGENDIAN |
357 |
_blk[3] = byteswap(blk[2]); |
358 |
_blk[2] = byteswap(blk[3]); |
359 |
_blk[1] = byteswap(blk[0]); |
360 |
_blk[0] = byteswap(blk[1]); |
361 |
#else |
362 |
_blk[3] = (blk[2]); |
363 |
_blk[2] = (blk[3]); |
364 |
_blk[1] = (blk[0]); |
365 |
_blk[0] = (blk[1]); |
366 |
#endif |
367 |
} |