]> git.cryptolib.org Git - avr-crypto-lib.git/blob - cast5.c
bab5c065cb7c430c09b3a93b5d3f72156bc1816c
[avr-crypto-lib.git] / cast5.c
1 /* 
2  * \file        cast5.c
3  * \author      Daniel Otte
4  * \date        26.07.2006
5  * \par License:
6  *  GPL
7  * \brief Implementation of the CAST5 (aka CAST-128) cipher algorithm as described in RFC 2144
8  * 
9  */
10  
11  #include <stdint.h>
12  #include <string.h>
13  #include "cast5.h"
14  #include "config.h"
15  #include "debug.h"
16  
17  #undef DEBUG
18  
19  #ifdef DEBUG
20   #include "uart.h"
21  #endif
22  
23 #include "cast5-sbox.h"
24
25
26  
27 #define S5(x) pgm_read_dword(&s5[(x)])
28 #define S6(x) pgm_read_dword(&s6[(x)])
29 #define S7(x) pgm_read_dword(&s7[(x)])
30 #define S8(x) pgm_read_dword(&s8[(x)])
31
32  
33 void cast5_init_A(uint8_t *dest, uint8_t *src, bool bmode){
34         uint8_t mask = bmode?0x8:0;
35         *((uint32_t*)(&dest[0x0])) = *((uint32_t*)(&src[0x0^mask])) ^ S5(src[0xD^mask]) ^ S6(src[0xF^mask]) ^ S7(src[0xC^mask]) ^ S8(src[0xE^mask]) ^ S7(src[0x8^mask]);
36         *((uint32_t*)(&dest[0x4])) = *((uint32_t*)(&src[0x8^mask])) ^ S5(dest[0x0]) ^ S6(dest[0x2]) ^ S7(dest[0x1]) ^ S8(dest[0x3]) ^ S8(src[0xA^mask]);
37         *((uint32_t*)(&dest[0x8])) = *((uint32_t*)(&src[0xC^mask])) ^ S5(dest[0x7]) ^ S6(dest[0x6]) ^ S7(dest[0x5]) ^ S8(dest[0x4]) ^ S5(src[0x9^mask]);
38         *((uint32_t*)(&dest[0xC])) = *((uint32_t*)(&src[0x4^mask])) ^ S5(dest[0xA]) ^ S6(dest[0x9]) ^ S7(dest[0xB]) ^ S8(dest[0x8]) ^ S6(src[0xB^mask]);
39 }
40
41 void cast5_init_M(uint8_t *dest, uint8_t *src, bool nmode, bool xmode){
42         uint8_t nmt[] = {0xB, 0xA, 0x9, 0x8, 0xF, 0xE, 0xD, 0xC, 0x3, 0x2, 0x1, 0x0, 0x7, 0x6, 0x5, 0x4}; /* nmode table */
43         uint8_t xmt[4][4] = {{0x2, 0x6, 0x9, 0xC}, {0x8, 0xD, 0x3, 0x7}, {0x3, 0x7, 0x8, 0xD}, {0x9, 0xC, 0x2, 0x6}};
44         #define NMT(x) (src[nmode?nmt[(x)]:(x)])
45         #define XMT(x) (src[xmt[(xmode<<1) + nmode][(x)]])
46         *((uint32_t*)(&dest[0x0])) = S5(NMT(0x8)) ^ S6(NMT(0x9)) ^ S7(NMT(0x7)) ^ S8(NMT(0x6)) ^ S5(XMT(0));
47         *((uint32_t*)(&dest[0x4])) = S5(NMT(0xA)) ^ S6(NMT(0xB)) ^ S7(NMT(0x5)) ^ S8(NMT(0x4)) ^ S6(XMT(1));
48         *((uint32_t*)(&dest[0x8])) = S5(NMT(0xC)) ^ S6(NMT(0xD)) ^ S7(NMT(0x3)) ^ S8(NMT(0x2)) ^ S7(XMT(2));
49         *((uint32_t*)(&dest[0xC])) = S5(NMT(0xE)) ^ S6(NMT(0xF)) ^ S7(NMT(0x1)) ^ S8(NMT(0x0)) ^ S8(XMT(3));
50 }
51
52 #define S5B(x) pgm_read_byte(3+(uint8_t*)(&s5[(x)]))
53 #define S6B(x) pgm_read_byte(3+(uint8_t*)(&s6[(x)]))
54 #define S7B(x) pgm_read_byte(3+(uint8_t*)(&s7[(x)]))
55 #define S8B(x) pgm_read_byte(3+(uint8_t*)(&s8[(x)]))
56
57 void cast5_init_rM(uint8_t *klo, uint8_t *khi, uint8_t offset, uint8_t *src, bool nmode, bool xmode){
58         uint8_t nmt[] = {0xB, 0xA, 0x9, 0x8, 0xF, 0xE, 0xD, 0xC, 0x3, 0x2, 0x1, 0x0, 0x7, 0x6, 0x5, 0x4}; /* nmode table */
59         uint8_t xmt[4][4] = {{0x2, 0x6, 0x9, 0xC}, {0x8, 0xD, 0x3, 0x7}, {0x3, 0x7, 0x8, 0xD}, {0x9, 0xC, 0x2, 0x6}};
60         uint8_t t, h=0; 
61         t = S5B(NMT(0x8)) ^ S6B(NMT(0x9)) ^ S7B(NMT(0x7)) ^ S8B(NMT(0x6)) ^ S5B(XMT(0));
62                 klo[offset*2] |= (t & 0x0f);
63                 h |= (t&0x10); h>>=1;
64         t = S5B(NMT(0xA)) ^ S6B(NMT(0xB)) ^ S7B(NMT(0x5)) ^ S8B(NMT(0x4)) ^ S6B(XMT(1));
65                 klo[offset*2] |= (t<<4) & 0xf0;
66                 h |= t&0x10; h>>=1;
67         t = S5B(NMT(0xC)) ^ S6B(NMT(0xD)) ^ S7B(NMT(0x3)) ^ S8B(NMT(0x2)) ^ S7B(XMT(2));
68                 klo[offset*2+1] |= t&0xf;
69                 h |= t&0x10; h>>=1;
70         t = S5B(NMT(0xE)) ^ S6B(NMT(0xF)) ^ S7B(NMT(0x1)) ^ S8B(NMT(0x0)) ^ S8B(XMT(3));
71                 klo[offset*2+1] |= t<<4;
72                 h |= t&0x10; h >>=1;
73         #ifdef DEBUG
74                 uart_putstr("\r\n\t h="); uart_hexdump(&h,1);
75         #endif
76         khi[offset>>1] |= h<<((offset&0x1)?4:0);
77 }
78
79 #define S_5X(s) pgm_read_dword(&s5[BPX[(s)]])
80 #define S_6X(s) pgm_read_dword(&s6[BPX[(s)]])
81 #define S_7X(s) pgm_read_dword(&s7[BPX[(s)]])
82 #define S_8X(s) pgm_read_dword(&s8[BPX[(s)]])
83
84 #define S_5Z(s) pgm_read_dword(&s5[BPZ[(s)]])
85 #define S_6Z(s) pgm_read_dword(&s6[BPZ[(s)]])
86 #define S_7Z(s) pgm_read_dword(&s7[BPZ[(s)]])
87 #define S_8Z(s) pgm_read_dword(&s8[BPZ[(s)]])
88
89
90
91 /**
92  * \brief sets up round keys (context) for cast5 en/decryption.
93  * @param s Pointer to cast5 context.
94  * @param key Pointer to binary key.
95  * @param keylength length of keydata in bits.
96  */
97 void cast5_init(cast5_ctx_t* s, uint8_t* key, uint8_t keylength){
98          /* we migth return if the key is valid and if setup was sucessfull */
99         uint32_t x[4], z[4];
100         #define BPX ((uint8_t*)&(x[0]))
101         #define BPZ ((uint8_t*)&(z[0]))
102         s->shortkey = (keylength<=80);
103         /* littel endian only! */
104         memset(&(x[0]), 0 ,16); /* set x to zero */
105         if(keylength > 128)
106                 keylength=128;
107         memcpy(&(x[0]), key, (keylength+7)/8);
108         
109
110         /* todo: merge a and b and compress the whole stuff */
111         /***** A *****/
112         cast5_init_A((uint8_t*)(&z[0]), (uint8_t*)(&x[0]), false);      
113         /***** M *****/
114         cast5_init_M((uint8_t*)(&(s->mask[0])), (uint8_t*)(&z[0]), false, false);
115         /***** B *****/
116         cast5_init_A((uint8_t*)(&x[0]), (uint8_t*)(&z[0]), true);
117         /***** N *****/
118         cast5_init_M((uint8_t*)(&(s->mask[4])), (uint8_t*)(&x[0]), true, false);
119         /***** A *****/
120         cast5_init_A((uint8_t*)(&z[0]), (uint8_t*)(&x[0]), false);
121         /***** N' *****/
122         cast5_init_M((uint8_t*)(&(s->mask[8])), (uint8_t*)(&z[0]), true, true);
123         /***** B *****/
124         cast5_init_A((uint8_t*)(&x[0]), (uint8_t*)(&z[0]), true);
125         /***** M' *****/
126         cast5_init_M((uint8_t*)(&(s->mask[12])), (uint8_t*)(&x[0]), false, true);
127         
128         /* that were the masking keys, now the rotation keys */
129         /* set the keys to zero */
130         memset(&(s->rotl[0]),0,8);
131         s->roth[0]=s->roth[1]=0;
132         /***** A *****/
133         cast5_init_A((uint8_t*)(&z[0]), (uint8_t*)(&x[0]), false);
134         /***** M *****/
135         cast5_init_rM(&(s->rotl[0]), &(s->roth[0]), 0, (uint8_t*)(&z[0]), false, false);
136         /***** B *****/
137         cast5_init_A((uint8_t*)(&x[0]), (uint8_t*)(&z[0]), true);
138         /***** N *****/
139         cast5_init_rM(&(s->rotl[0]), &(s->roth[0]), 1, (uint8_t*)(&x[0]), true, false);
140         /***** A *****/
141         cast5_init_A((uint8_t*)(&z[0]), (uint8_t*)(&x[0]), false);
142         /***** N' *****/
143         cast5_init_rM(&(s->rotl[0]), &(s->roth[0]), 2, (uint8_t*)(&z[0]), true, true);
144         /***** B *****/
145         cast5_init_A((uint8_t*)(&x[0]), (uint8_t*)(&z[0]), true);
146         /***** M' *****/
147         cast5_init_rM(&(s->rotl[0]), &(s->roth[0]), 3, (uint8_t*)(&x[0]), false, true);
148         /* done ;-) */
149 }
150
151
152
153 /********************************************************************************************************/
154
155 #define ROTL32(a,n) ((a)<<(n) | (a)>>(32-(n)))
156 #define CHANGE_ENDIAN32(x) ((x)<<24 | (x)>>24 | ((x)&0xff00)<<8 | ((x)&0xff0000)>>8 )
157
158 typedef uint32_t cast5_f_t(uint32_t,uint32_t,uint8_t);
159
160 #define IA 3
161 #define IB 2
162 #define IC 1
163 #define ID 0
164
165
166 uint32_t cast5_f1(uint32_t d, uint32_t m, uint8_t r){
167         uint32_t t;
168         t = ROTL32((d + m),r);
169 #ifdef DEBUG
170         uint32_t ia,ib,ic,id;
171         uart_putstr("\r\n f1("); uart_hexdump(&d, 4); uart_putc(',');
172                 uart_hexdump(&m , 4); uart_putc(','); uart_hexdump(&r, 1);uart_putstr("): I=");
173                 uart_hexdump(&t, 4);
174         ia = pgm_read_dword(&s1[((uint8_t*)&t)[IA]] );
175         ib = pgm_read_dword(&s2[((uint8_t*)&t)[IB]] );
176         ic = pgm_read_dword(&s3[((uint8_t*)&t)[IC]] );
177         id = pgm_read_dword(&s4[((uint8_t*)&t)[ID]] );
178         uart_putstr("\r\n\tIA="); uart_hexdump(&ia, 4);
179         uart_putstr("\r\n\tIB="); uart_hexdump(&ib, 4);
180         uart_putstr("\r\n\tIC="); uart_hexdump(&ic, 4);
181         uart_putstr("\r\n\tID="); uart_hexdump(&id, 4);
182
183         return (((ia ^ ib) - ic) + id);
184
185 #else
186         
187         return (((pgm_read_dword(&s1[((uint8_t*)&t)[IA]] ) ^ pgm_read_dword(&s2[((uint8_t*)&t)[IB]] )) 
188                 - pgm_read_dword(&s3[((uint8_t*)&t)[IC]] )) + pgm_read_dword(&s4[((uint8_t*)&t)[ID]]));
189
190 #endif
191 }
192
193
194 uint32_t cast5_f2(uint32_t d, uint32_t m, uint8_t r){
195         uint32_t t;
196         t = ROTL32((d ^ m),r);
197 #ifdef DEBUG
198         uint32_t ia,ib,ic,id;
199         uart_putstr("\r\n f2("); uart_hexdump(&d, 4); uart_putc(',');
200                 uart_hexdump(&m , 4); uart_putc(','); uart_hexdump(&r, 1);uart_putstr("): I=");
201                 uart_hexdump(&t, 4);
202
203         ia = pgm_read_dword(&s1[((uint8_t*)&t)[IA]] );
204         ib = pgm_read_dword(&s2[((uint8_t*)&t)[IB]] );
205         ic = pgm_read_dword(&s3[((uint8_t*)&t)[IC]] );
206         id = pgm_read_dword(&s4[((uint8_t*)&t)[ID]] );
207         
208         uart_putstr("\r\n\tIA="); uart_hexdump(&ia, 4);
209         uart_putstr("\r\n\tIB="); uart_hexdump(&ib, 4);
210         uart_putstr("\r\n\tIC="); uart_hexdump(&ic, 4);
211         uart_putstr("\r\n\tID="); uart_hexdump(&id, 4);
212
213         return (((ia - ib) + ic) ^ id);
214 #else
215         
216         return (((pgm_read_dword(&s1[((uint8_t*)&t)[IA]]) 
217                 - pgm_read_dword(&s2[((uint8_t*)&t)[IB]]) ) 
218                     + pgm_read_dword(&s3[((uint8_t*)&t)[IC]]) ) 
219                     ^ pgm_read_dword(&s4[((uint8_t*)&t)[ID]]) );
220
221 #endif
222 }
223
224 uint32_t cast5_f3(uint32_t d, uint32_t m, uint8_t r){
225         uint32_t t;
226         t = ROTL32((m - d),r);
227
228 #ifdef DEBUG
229         uint32_t ia,ib,ic,id;
230
231         uart_putstr("\r\n f3("); uart_hexdump(&d, 4); uart_putc(',');
232                 uart_hexdump(&m , 4); uart_putc(','); uart_hexdump(&r, 1);uart_putstr("): I=");
233                 uart_hexdump(&t, 4);
234
235         ia = pgm_read_dword(&s1[((uint8_t*)&t)[IA]] );
236         ib = pgm_read_dword(&s2[((uint8_t*)&t)[IB]] );
237         ic = pgm_read_dword(&s3[((uint8_t*)&t)[IC]] );
238         id = pgm_read_dword(&s4[((uint8_t*)&t)[ID]] );
239         
240         uart_putstr("\r\n\tIA="); uart_hexdump(&ia, 4);
241         uart_putstr("\r\n\tIB="); uart_hexdump(&ib, 4);
242         uart_putstr("\r\n\tIC="); uart_hexdump(&ic, 4);
243         uart_putstr("\r\n\tID="); uart_hexdump(&id, 4);
244         return (((ia + ib) ^ ic) - id);
245 #else
246         return ((pgm_read_dword(&s1[((uint8_t*)&t)[IA]] ) + pgm_read_dword(&s2[((uint8_t*)&t)[IB]] )) 
247                 ^ pgm_read_dword(&s3[((uint8_t*)&t)[IC]] )) - pgm_read_dword(&s4[((uint8_t*)&t)[ID]] );
248
249 #endif
250 }
251
252 /*************************************************************************/
253
254 /**
255  * \brief encrypts a datablock with cast5
256  * @param s Pointer to cast5 roundkeys (context)
257  * @param block Pointer to datablock
258  */
259 void cast5_enc(cast5_ctx_t *s, void* block){
260         uint32_t l,r, x, y;
261         uint8_t i;
262         cast5_f_t* f[]={cast5_f1,cast5_f2,cast5_f3};
263         l=((uint32_t*)block)[0];
264         r=((uint32_t*)block)[1];
265 //      uart_putstr("\r\n round[-1] = ");
266 //      uart_hexdump(&r, 4);
267         for (i=0;i<(s->shortkey?12:16);++i){
268                 x = r;
269                 y = (f[i%3])(CHANGE_ENDIAN32(r), CHANGE_ENDIAN32(s->mask[i]), 
270                         (((s->roth[i>>3]) & (1<<(i&0x7)))?0x10:0x00) 
271                          + ( ((s->rotl[i>>1])>>((i&1)?4:0)) & 0x0f) );
272                 r = l ^ CHANGE_ENDIAN32(y);
273 //              uart_putstr("\r\n round["); DEBUG_B(i); uart_putstr("] = ");
274 //              uart_hexdump(&r, 4);
275                 l = x;
276         }
277         ((uint32_t*)block)[0]=r;
278         ((uint32_t*)block)[1]=l;
279 }
280
281 /*************************************************************************/
282
283 /**
284  * \brief decrypts a datablock with cast5
285  * @param s Pointer to cast5 roundkeys (context)
286  * @param block Pointer to datablock
287  */
288 void cast5_dec(cast5_ctx_t *s, void* block){
289         uint32_t l,r, x, y;
290         int8_t i, rounds;
291         cast5_f_t* f[]={cast5_f1,cast5_f2,cast5_f3};
292         l=((uint32_t*)block)[0];
293         r=((uint32_t*)block)[1];
294         rounds = (s->shortkey?12:16);
295         for (i=rounds-1; i>=0 ;--i){
296                 x = r;
297                 y = (f[i%3])(CHANGE_ENDIAN32(r), CHANGE_ENDIAN32(s->mask[i]), 
298                         (((s->roth[i>>3]) & (1<<(i&0x7)))?0x10:0x00) 
299                          + ( ((s->rotl[i>>1])>>((i&1)?4:0)) & 0x0f) );
300                 r = l ^ CHANGE_ENDIAN32(y);
301                 l = x;
302         }
303         ((uint32_t*)block)[0]=r;
304         ((uint32_t*)block)[1]=l;
305 }
306
307
308 /*********************************************************************************************************/
309 /*********************************************************************************************************/
310 /*********************************************************************************************************/
311
312 #if 0
313
314 void cast5_old_init(cast5_ctx_t* s, uint8_t* key, uint8_t keylength){
315          /* we migth return if the key is valid and if setup was sucessfull */
316         uint32_t x[4], z[4], t;
317         #define BPX ((uint8_t*)&(x[0]))
318         #define BPZ ((uint8_t*)&(z[0]))
319         s->shortkey = (keylength<=80);
320         /* littel endian only! */
321         memset(&(x[0]), 0 ,16); /* set x to zero */
322         memcpy(&(x[0]), key, keylength/8);
323         
324
325         /* todo: merge a and b and compress the whole stuff */
326         /***** A *****/
327         z[0] = x[0] ^ S_5X(0xD) ^ S_6X(0xF) ^ S_7X(0xC) ^ S_8X(0xE) ^ S_7X(0x8);        
328         z[1] = x[2] ^ S_5Z(0x0) ^ S_6Z(0x2) ^ S_7Z(0x1) ^ S_8Z(0x3) ^ S_8X(0xA);
329         z[2] = x[3] ^ S_5Z(0x7) ^ S_6Z(0x6) ^ S_7Z(0x5) ^ S_8Z(0x4) ^ S_5X(0x9);
330         z[3] = x[1] ^ S_5Z(0xA) ^ S_6Z(0x9) ^ S_7Z(0xB) ^ S_8Z(0x8) ^ S_6X(0xB);
331         /***** M *****/
332         s->mask[0] = S_5Z(0x8) ^ S_6Z(0x9) ^ S_7Z(0x7) ^ S_8Z(0x6) ^ S_5Z(0x2);
333         s->mask[1] = S_5Z(0xA) ^ S_6Z(0xB) ^ S_7Z(0x5) ^ S_8Z(0x4) ^ S_6Z(0x6);
334         s->mask[2] = S_5Z(0xC) ^ S_6Z(0xD) ^ S_7Z(0x3) ^ S_8Z(0x2) ^ S_7Z(0x9);
335         s->mask[3] = S_5Z(0xE) ^ S_6Z(0xF) ^ S_7Z(0x1) ^ S_8Z(0x0) ^ S_8Z(0xC);
336         /***** B *****/
337         x[0] = z[2] ^ S_5Z(0x5) ^ S_6Z(0x7) ^ S_7Z(0x4) ^ S_8Z(0x6) ^ S_7Z(0x0);
338         x[1] = z[0] ^ S_5X(0x0) ^ S_6X(0x2) ^ S_7X(0x1) ^ S_8X(0x3) ^ S_8Z(0x2);
339         x[2] = z[1] ^ S_5X(0x7) ^ S_6X(0x6) ^ S_7X(0x5) ^ S_8X(0x4) ^ S_5Z(0x1);
340         x[3] = z[3] ^ S_5X(0xA) ^ S_6X(0x9) ^ S_7X(0xB) ^ S_8X(0x8) ^ S_6Z(0x3);
341         /***** N *****/
342         s->mask[4] = S_5X(0x3) ^ S_6X(0x2) ^ S_7X(0xC) ^ S_8X(0xD) ^ S_5X(0x8);
343         s->mask[5] = S_5X(0x1) ^ S_6X(0x0) ^ S_7X(0xE) ^ S_8X(0xF) ^ S_6X(0xD);
344         s->mask[6] = S_5X(0x7) ^ S_6X(0x6) ^ S_7X(0x8) ^ S_8X(0x9) ^ S_7X(0x3);
345         s->mask[7] = S_5X(0x5) ^ S_6X(0x4) ^ S_7X(0xA) ^ S_8X(0xB) ^ S_8X(0x7);
346         /***** A *****/
347         z[0] = x[0] ^ S_5X(0xD) ^ S_6X(0xF) ^ S_7X(0xC) ^ S_8X(0xE) ^ S_7X(0x8);
348         z[1] = x[2] ^ S_5Z(0x0) ^ S_6Z(0x2) ^ S_7Z(0x1) ^ S_8Z(0x3) ^ S_8X(0xA);
349         z[2] = x[3] ^ S_5Z(0x7) ^ S_6Z(0x6) ^ S_7Z(0x5) ^ S_8Z(0x4) ^ S_5X(0x9);
350         z[3] = x[1] ^ S_5Z(0xA) ^ S_6Z(0x9) ^ S_7Z(0xB) ^ S_8Z(0x8) ^ S_6X(0xB);
351         /***** N' *****/
352         s->mask[8] = S_5Z(0x3) ^ S_6Z(0x2) ^ S_7Z(0xC) ^ S_8Z(0xD) ^ S_5Z(0x9);
353         s->mask[9] = S_5Z(0x1) ^ S_6Z(0x0) ^ S_7Z(0xE) ^ S_8Z(0xF) ^ S_6Z(0xC);
354         s->mask[10] = S_5Z(0x7) ^ S_6Z(0x6) ^ S_7Z(0x8) ^ S_8Z(0x9) ^ S_7Z(0x2);
355         s->mask[11] = S_5Z(0x5) ^ S_6Z(0x4) ^ S_7Z(0xA) ^ S_8Z(0xB) ^ S_8Z(0x6);
356         /***** B *****/
357         x[0] = z[2] ^ S_5Z(0x5) ^ S_6Z(0x7) ^ S_7Z(0x4) ^ S_8Z(0x6) ^ S_7Z(0x0);
358         x[1] = z[0] ^ S_5X(0x0) ^ S_6X(0x2) ^ S_7X(0x1) ^ S_8X(0x3) ^ S_8Z(0x2);
359         x[2] = z[1] ^ S_5X(0x7) ^ S_6X(0x6) ^ S_7X(0x5) ^ S_8X(0x4) ^ S_5Z(0x1);
360         x[3] = z[3] ^ S_5X(0xA) ^ S_6X(0x9) ^ S_7X(0xB) ^ S_8X(0x8) ^ S_6Z(0x3);
361         /***** M' *****/
362         s->mask[12] = S_5X(0x8) ^ S_6X(0x9) ^ S_7X(0x7) ^ S_8X(0x6) ^ S_5X(0x3);
363         s->mask[13] = S_5X(0xA) ^ S_6X(0xB) ^ S_7X(0x5) ^ S_8X(0x4) ^ S_6X(0x7);
364         s->mask[14] = S_5X(0xC) ^ S_6X(0xD) ^ S_7X(0x3) ^ S_8X(0x2) ^ S_7X(0x8);
365         s->mask[15] = S_5X(0xE) ^ S_6X(0xF) ^ S_7X(0x1) ^ S_8X(0x0) ^ S_8X(0xD);
366
367         /* that were the masking keys, now the rotation keys */
368         /* set the keys to zero */
369         memset(&(s->rotl[0]),0,8);
370         s->roth[0]=s->roth[1]=0;
371         /***** A *****/
372         z[0] = x[0] ^ S_5X(0xD) ^ S_6X(0xF) ^ S_7X(0xC) ^ S_8X(0xE) ^ S_7X(0x8);
373         z[1] = x[2] ^ S_5Z(0x0) ^ S_6Z(0x2) ^ S_7Z(0x1) ^ S_8Z(0x3) ^ S_8X(0xA);
374         z[2] = x[3] ^ S_5Z(0x7) ^ S_6Z(0x6) ^ S_7Z(0x5) ^ S_8Z(0x4) ^ S_5X(0x9);
375         z[3] = x[1] ^ S_5Z(0xA) ^ S_6Z(0x9) ^ S_7Z(0xB) ^ S_8Z(0x8) ^ S_6X(0xB);
376         /***** M *****/
377         t = S_5Z(0x8) ^ S_6Z(0x9) ^ S_7Z(0x7) ^ S_8Z(0x6) ^ S_5Z(0x2);
378         t >>= 24;
379         s->rotl[0] |= t & 0x0f;         
380         s->roth[0] |= (t >> 4) & (1<<0);
381         t = S_5Z(0xA) ^ S_6Z(0xB) ^ S_7Z(0x5) ^ S_8Z(0x4) ^ S_6Z(0x6);
382         t >>= 24;
383         s->rotl[0] |= (t<<4) & 0xf0;
384         s->roth[0] |= (t >> 3) & (1<<1);
385         t = S_5Z(0xC) ^ S_6Z(0xD) ^ S_7Z(0x3) ^ S_8Z(0x2) ^ S_7Z(0x9);
386         t >>= 24;
387         s->rotl[1] |= t & 0x0f;         
388         s->roth[0] |= (t >> 2) & (1<<2);
389         t = S_5Z(0xE) ^ S_6Z(0xF) ^ S_7Z(0x1) ^ S_8Z(0x0) ^ S_8Z(0xC);
390         t >>= 24;
391         s->rotl[1] |= (t<<4) & 0xf0;
392         s->roth[0] |= (t >> 1) & (1<<3);
393         /***** B *****/
394         x[0] = z[2] ^ S_5Z(0x5) ^ S_6Z(0x7) ^ S_7Z(0x4) ^ S_8Z(0x6) ^ S_7Z(0x0);
395         x[1] = z[0] ^ S_5X(0x0) ^ S_6X(0x2) ^ S_7X(0x1) ^ S_8X(0x3) ^ S_8Z(0x2);
396         x[2] = z[1] ^ S_5X(0x7) ^ S_6X(0x6) ^ S_7X(0x5) ^ S_8X(0x4) ^ S_5Z(0x1);
397         x[3] = z[3] ^ S_5X(0xA) ^ S_6X(0x9) ^ S_7X(0xB) ^ S_8X(0x8) ^ S_6Z(0x3);
398         /***** N *****/
399         t = S_5X(0x3) ^ S_6X(0x2) ^ S_7X(0xC) ^ S_8X(0xD) ^ S_5X(0x8);
400         t >>= 24;
401         s->rotl[2] |= t & 0x0f;         
402         s->roth[0] |= t & (1<<4);
403         t = S_5X(0x1) ^ S_6X(0x0) ^ S_7X(0xE) ^ S_8X(0xF) ^ S_6X(0xD);
404         t >>= 24;
405         s->rotl[2] |= (t<<4) & 0xf0;            
406         s->roth[0] |= (t<<1) & (1<<5);
407         t = S_5X(0x7) ^ S_6X(0x6) ^ S_7X(0x8) ^ S_8X(0x9) ^ S_7X(0x3);
408         t >>= 24;
409         s->rotl[3] |= t & 0x0f;         
410         s->roth[0] |= (t<<2) & (1<<6);
411         t = S_5X(0x5) ^ S_6X(0x4) ^ S_7X(0xA) ^ S_8X(0xB) ^ S_8X(0x7);
412         t >>= 24;
413         s->rotl[3] |= (t<<4) & 0xf0;            
414         s->roth[0] |= (t<<3) & (1<<7);
415         /***** A *****/
416         z[0] = x[0] ^ S_5X(0xD) ^ S_6X(0xF) ^ S_7X(0xC) ^ S_8X(0xE) ^ S_7X(0x8);
417         z[1] = x[2] ^ S_5Z(0x0) ^ S_6Z(0x2) ^ S_7Z(0x1) ^ S_8Z(0x3) ^ S_8X(0xA);
418         z[2] = x[3] ^ S_5Z(0x7) ^ S_6Z(0x6) ^ S_7Z(0x5) ^ S_8Z(0x4) ^ S_5X(0x9);
419         z[3] = x[1] ^ S_5Z(0xA) ^ S_6Z(0x9) ^ S_7Z(0xB) ^ S_8Z(0x8) ^ S_6X(0xB);
420         /***** N' *****/
421         t = S_5Z(0x3) ^ S_6Z(0x2) ^ S_7Z(0xC) ^ S_8Z(0xD) ^ S_5Z(0x9);
422         t >>= 24;
423         s->rotl[4] |= t & 0x0f;         
424         s->roth[1] |= (t>>4) & (1<<0);
425         t = S_5Z(0x1) ^ S_6Z(0x0) ^ S_7Z(0xE) ^ S_8Z(0xF) ^ S_6Z(0xC);
426         t >>= 24;
427         s->rotl[4] |= (t<<4) & 0xf0;            
428         s->roth[1] |= (t>>3) & (1<<1);
429         t = S_5Z(0x7) ^ S_6Z(0x6) ^ S_7Z(0x8) ^ S_8Z(0x9) ^ S_7Z(0x2);
430         t >>= 24;
431         s->rotl[5] |= t & 0x0f;         
432         s->roth[1] |= (t>>2) & (1<<2);
433         t = S_5Z(0x5) ^ S_6Z(0x4) ^ S_7Z(0xA) ^ S_8Z(0xB) ^ S_8Z(0x6);
434         t >>= 24;
435         s->rotl[5] |= (t<<4) & 0xf0;            
436         s->roth[1] |= (t>>1) & (1<<3);
437         /***** B *****/
438         x[0] = z[2] ^ S_5Z(0x5) ^ S_6Z(0x7) ^ S_7Z(0x4) ^ S_8Z(0x6) ^ S_7Z(0x0);
439         x[1] = z[0] ^ S_5X(0x0) ^ S_6X(0x2) ^ S_7X(0x1) ^ S_8X(0x3) ^ S_8Z(0x2);
440         x[2] = z[1] ^ S_5X(0x7) ^ S_6X(0x6) ^ S_7X(0x5) ^ S_8X(0x4) ^ S_5Z(0x1);
441         x[3] = z[3] ^ S_5X(0xA) ^ S_6X(0x9) ^ S_7X(0xB) ^ S_8X(0x8) ^ S_6Z(0x3);
442         /***** M' *****/
443         t = S_5X(0x8) ^ S_6X(0x9) ^ S_7X(0x7) ^ S_8X(0x6) ^ S_5X(0x3);
444         t >>= 24;
445         s->rotl[6] |= t & 0x0f;         
446         s->roth[1] |= t & (1<<4);
447         t = S_5X(0xA) ^ S_6X(0xB) ^ S_7X(0x5) ^ S_8X(0x4) ^ S_6X(0x7);
448         t >>= 24;
449         s->rotl[6] |= (t<<4) & 0xf0;            
450         s->roth[1] |= (t<<1) & (1<<5);
451         t = S_5X(0xC) ^ S_6X(0xD) ^ S_7X(0x3) ^ S_8X(0x2) ^ S_7X(0x8);
452         t >>= 24;
453         s->rotl[7] |= t & 0x0f;         
454         s->roth[1] |= (t<<2) & (1<<6);
455         t = S_5X(0xE) ^ S_6X(0xF) ^ S_7X(0x1) ^ S_8X(0x0) ^ S_8X(0xD);
456         t >>= 24;
457         s->rotl[7] |= (t<<4) & 0xf0;            
458         s->roth[1] |= (t<<3) & (1<<7);
459         
460         /* done ;-) */
461 }
462
463 #endif
464
465
466
467