]> git.cryptolib.org Git - avr-crypto-lib.git/blob - cast5.c
new, derived from old avr/crypto + cast5
[avr-crypto-lib.git] / cast5.c
1 /* 
2  * File:        cast5.c
3  * Author:      Daniel Otte
4  * Date:        26.07.2006
5  * License: GPL
6  * Description: Implementation of the CAST5 (aka CAST-128) cipher algorithm as described in RFC 2144
7  * 
8  */
9  //pgm_read_dword
10  
11  #include <stdint.h>
12  #include <string.h>
13  #include "cast5.h"
14  #include "config.h"
15  #include "uart.h"
16  #include "debug.h"
17  
18  #undef DEBUG
19  
20 #include "cast5-sbox.h"
21
22
23  
24 #define S5(x) pgm_read_dword(&s5[(x)])
25 #define S6(x) pgm_read_dword(&s6[(x)])
26 #define S7(x) pgm_read_dword(&s7[(x)])
27 #define S8(x) pgm_read_dword(&s8[(x)])
28
29  
30 void cast5_init_A(uint8_t *dest, uint8_t *src, bool bmode){
31         uint8_t mask = bmode?0x8:0;
32         *((uint32_t*)(&dest[0x0])) = *((uint32_t*)(&src[0x0^mask])) ^ S5(src[0xD^mask]) ^ S6(src[0xF^mask]) ^ S7(src[0xC^mask]) ^ S8(src[0xE^mask]) ^ S7(src[0x8^mask]);
33         *((uint32_t*)(&dest[0x4])) = *((uint32_t*)(&src[0x8^mask])) ^ S5(dest[0x0]) ^ S6(dest[0x2]) ^ S7(dest[0x1]) ^ S8(dest[0x3]) ^ S8(src[0xA^mask]);
34         *((uint32_t*)(&dest[0x8])) = *((uint32_t*)(&src[0xC^mask])) ^ S5(dest[0x7]) ^ S6(dest[0x6]) ^ S7(dest[0x5]) ^ S8(dest[0x4]) ^ S5(src[0x9^mask]);
35         *((uint32_t*)(&dest[0xC])) = *((uint32_t*)(&src[0x4^mask])) ^ S5(dest[0xA]) ^ S6(dest[0x9]) ^ S7(dest[0xB]) ^ S8(dest[0x8]) ^ S6(src[0xB^mask]);
36 }
37
38 void cast5_init_M(uint8_t *dest, uint8_t *src, bool nmode, bool xmode){
39         uint8_t nmt[] = {0xB, 0xA, 0x9, 0x8, 0xF, 0xE, 0xD, 0xC, 0x3, 0x2, 0x1, 0x0, 0x7, 0x6, 0x5, 0x4}; /* nmode table */
40         uint8_t xmt[4][4] = {{0x2, 0x6, 0x9, 0xC}, {0x8, 0xD, 0x3, 0x7}, {0x3, 0x7, 0x8, 0xD}, {0x9, 0xC, 0x2, 0x6}};
41         #define NMT(x) (src[nmode?nmt[(x)]:(x)])
42         #define XMT(x) (src[xmt[(xmode<<1) + nmode][(x)]])
43         *((uint32_t*)(&dest[0x0])) = S5(NMT(0x8)) ^ S6(NMT(0x9)) ^ S7(NMT(0x7)) ^ S8(NMT(0x6)) ^ S5(XMT(0));
44         *((uint32_t*)(&dest[0x4])) = S5(NMT(0xA)) ^ S6(NMT(0xB)) ^ S7(NMT(0x5)) ^ S8(NMT(0x4)) ^ S6(XMT(1));
45         *((uint32_t*)(&dest[0x8])) = S5(NMT(0xC)) ^ S6(NMT(0xD)) ^ S7(NMT(0x3)) ^ S8(NMT(0x2)) ^ S7(XMT(2));
46         *((uint32_t*)(&dest[0xC])) = S5(NMT(0xE)) ^ S6(NMT(0xF)) ^ S7(NMT(0x1)) ^ S8(NMT(0x0)) ^ S8(XMT(3));
47 }
48
49 #define S5B(x) pgm_read_byte(3+(uint8_t*)(&s5[(x)]))
50 #define S6B(x) pgm_read_byte(3+(uint8_t*)(&s6[(x)]))
51 #define S7B(x) pgm_read_byte(3+(uint8_t*)(&s7[(x)]))
52 #define S8B(x) pgm_read_byte(3+(uint8_t*)(&s8[(x)]))
53
54 void cast5_init_rM(uint8_t *klo, uint8_t *khi, uint8_t offset, uint8_t *src, bool nmode, bool xmode){
55         uint8_t nmt[] = {0xB, 0xA, 0x9, 0x8, 0xF, 0xE, 0xD, 0xC, 0x3, 0x2, 0x1, 0x0, 0x7, 0x6, 0x5, 0x4}; /* nmode table */
56         uint8_t xmt[4][4] = {{0x2, 0x6, 0x9, 0xC}, {0x8, 0xD, 0x3, 0x7}, {0x3, 0x7, 0x8, 0xD}, {0x9, 0xC, 0x2, 0x6}};
57         uint8_t t, h=0; 
58         t = S5B(NMT(0x8)) ^ S6B(NMT(0x9)) ^ S7B(NMT(0x7)) ^ S8B(NMT(0x6)) ^ S5B(XMT(0));
59                 klo[offset*2] |= (t & 0x0f);
60                 h |= (t&0x10); h>>=1;
61         t = S5B(NMT(0xA)) ^ S6B(NMT(0xB)) ^ S7B(NMT(0x5)) ^ S8B(NMT(0x4)) ^ S6B(XMT(1));
62                 klo[offset*2] |= (t<<4) & 0xf0;
63                 h |= t&0x10; h>>=1;
64         t = S5B(NMT(0xC)) ^ S6B(NMT(0xD)) ^ S7B(NMT(0x3)) ^ S8B(NMT(0x2)) ^ S7B(XMT(2));
65                 klo[offset*2+1] |= t&0xf;
66                 h |= t&0x10; h>>=1;
67         t = S5B(NMT(0xE)) ^ S6B(NMT(0xF)) ^ S7B(NMT(0x1)) ^ S8B(NMT(0x0)) ^ S8B(XMT(3));
68                 klo[offset*2+1] |= t<<4;
69                 h |= t&0x10; h >>=1;
70         #ifdef DEBUG
71                 uart_putstr("\r\n\t h="); uart_hexdump(&h,1);
72         #endif
73         khi[offset>>1] |= h<<((offset&0x1)?4:0);
74 }
75
76 #define S_5X(s) pgm_read_dword(&s5[BPX[(s)]])
77 #define S_6X(s) pgm_read_dword(&s6[BPX[(s)]])
78 #define S_7X(s) pgm_read_dword(&s7[BPX[(s)]])
79 #define S_8X(s) pgm_read_dword(&s8[BPX[(s)]])
80
81 #define S_5Z(s) pgm_read_dword(&s5[BPZ[(s)]])
82 #define S_6Z(s) pgm_read_dword(&s6[BPZ[(s)]])
83 #define S_7Z(s) pgm_read_dword(&s7[BPZ[(s)]])
84 #define S_8Z(s) pgm_read_dword(&s8[BPZ[(s)]])
85
86
87
88
89 void cast5_init(cast5_ctx_t* s, uint8_t* key, uint8_t keylength){
90          /* we migth return if the key is valid and if setup was sucessfull */
91         uint32_t x[4], z[4];
92         #define BPX ((uint8_t*)&(x[0]))
93         #define BPZ ((uint8_t*)&(z[0]))
94         s->shortkey = (keylength<=80);
95         /* littel endian only! */
96         memset(&(x[0]), 0 ,16); /* set x to zero */
97         memcpy(&(x[0]), key, keylength/8);
98         
99
100         /* todo: merge a and b and compress the whole stuff */
101         /***** A *****/
102         cast5_init_A((uint8_t*)(&z[0]), (uint8_t*)(&x[0]), false);      
103         /***** M *****/
104         cast5_init_M((uint8_t*)(&(s->mask[0])), (uint8_t*)(&z[0]), false, false);
105         /***** B *****/
106         cast5_init_A((uint8_t*)(&x[0]), (uint8_t*)(&z[0]), true);
107         /***** N *****/
108         cast5_init_M((uint8_t*)(&(s->mask[4])), (uint8_t*)(&x[0]), true, false);
109         /***** A *****/
110         cast5_init_A((uint8_t*)(&z[0]), (uint8_t*)(&x[0]), false);
111         /***** N' *****/
112         cast5_init_M((uint8_t*)(&(s->mask[8])), (uint8_t*)(&z[0]), true, true);
113         /***** B *****/
114         cast5_init_A((uint8_t*)(&x[0]), (uint8_t*)(&z[0]), true);
115         /***** M' *****/
116         cast5_init_M((uint8_t*)(&(s->mask[12])), (uint8_t*)(&x[0]), false, true);
117         
118         /* that were the masking keys, now the rotation keys */
119         /* set the keys to zero */
120         memset(&(s->rotl[0]),0,8);
121         s->roth[0]=s->roth[1]=0;
122         /***** A *****/
123         cast5_init_A((uint8_t*)(&z[0]), (uint8_t*)(&x[0]), false);
124         /***** M *****/
125         cast5_init_rM(&(s->rotl[0]), &(s->roth[0]), 0, (uint8_t*)(&z[0]), false, false);
126         /***** B *****/
127         cast5_init_A((uint8_t*)(&x[0]), (uint8_t*)(&z[0]), true);
128         /***** N *****/
129         cast5_init_rM(&(s->rotl[0]), &(s->roth[0]), 1, (uint8_t*)(&x[0]), true, false);
130         /***** A *****/
131         cast5_init_A((uint8_t*)(&z[0]), (uint8_t*)(&x[0]), false);
132         /***** N' *****/
133         cast5_init_rM(&(s->rotl[0]), &(s->roth[0]), 2, (uint8_t*)(&z[0]), true, true);
134         /***** B *****/
135         cast5_init_A((uint8_t*)(&x[0]), (uint8_t*)(&z[0]), true);
136         /***** M' *****/
137         cast5_init_rM(&(s->rotl[0]), &(s->roth[0]), 3, (uint8_t*)(&x[0]), false, true);
138         /* done ;-) */
139 }
140
141
142
143 /********************************************************************************************************/
144
145 #define ROTL32(a,n) ((a)<<(n) | (a)>>(32-(n)))
146 #define CHANGE_ENDIAN32(x) ((x)<<24 | (x)>>24 | ((x)&0xff00)<<8 | ((x)&0xff0000)>>8 )
147
148 typedef uint32_t cast5_f_t(uint32_t,uint32_t,uint8_t);
149
150 #define IA 3
151 #define IB 2
152 #define IC 1
153 #define ID 0
154
155
156 uint32_t cast5_f1(uint32_t d, uint32_t m, uint8_t r){
157         uint32_t t;
158         t = ROTL32((d + m),r);
159 #ifdef DEBUG
160         uint32_t ia,ib,ic,id;
161         uart_putstr("\r\n f1("); uart_hexdump(&d, 4); uart_putc(',');
162                 uart_hexdump(&m , 4); uart_putc(','); uart_hexdump(&r, 1);uart_putstr("): I=");
163                 uart_hexdump(&t, 4);
164         ia = pgm_read_dword(&s1[((uint8_t*)&t)[IA]] );
165         ib = pgm_read_dword(&s2[((uint8_t*)&t)[IB]] );
166         ic = pgm_read_dword(&s3[((uint8_t*)&t)[IC]] );
167         id = pgm_read_dword(&s4[((uint8_t*)&t)[ID]] );
168         uart_putstr("\r\n\tIA="); uart_hexdump(&ia, 4);
169         uart_putstr("\r\n\tIB="); uart_hexdump(&ib, 4);
170         uart_putstr("\r\n\tIC="); uart_hexdump(&ic, 4);
171         uart_putstr("\r\n\tID="); uart_hexdump(&id, 4);
172
173         return (((ia ^ ib) - ic) + id);
174
175 #else
176         
177         return (((pgm_read_dword(&s1[((uint8_t*)&t)[IA]] ) ^ pgm_read_dword(&s2[((uint8_t*)&t)[IB]] )) 
178                 - pgm_read_dword(&s3[((uint8_t*)&t)[IC]] )) + pgm_read_dword(&s4[((uint8_t*)&t)[ID]]));
179
180 #endif
181 }
182
183
184 uint32_t cast5_f2(uint32_t d, uint32_t m, uint8_t r){
185         uint32_t t;
186         t = ROTL32((d ^ m),r);
187 #ifdef DEBUG
188         uint32_t ia,ib,ic,id;
189         uart_putstr("\r\n f2("); uart_hexdump(&d, 4); uart_putc(',');
190                 uart_hexdump(&m , 4); uart_putc(','); uart_hexdump(&r, 1);uart_putstr("): I=");
191                 uart_hexdump(&t, 4);
192
193         ia = pgm_read_dword(&s1[((uint8_t*)&t)[IA]] );
194         ib = pgm_read_dword(&s2[((uint8_t*)&t)[IB]] );
195         ic = pgm_read_dword(&s3[((uint8_t*)&t)[IC]] );
196         id = pgm_read_dword(&s4[((uint8_t*)&t)[ID]] );
197         
198         uart_putstr("\r\n\tIA="); uart_hexdump(&ia, 4);
199         uart_putstr("\r\n\tIB="); uart_hexdump(&ib, 4);
200         uart_putstr("\r\n\tIC="); uart_hexdump(&ic, 4);
201         uart_putstr("\r\n\tID="); uart_hexdump(&id, 4);
202
203         return (((ia - ib) + ic) ^ id);
204 #else
205         
206         return (((pgm_read_dword(&s1[((uint8_t*)&t)[IA]] ) - pgm_read_dword(&s2[((uint8_t*)&t)[IB]] )) 
207                 + pgm_read_dword(&s3[((uint8_t*)&t)[IC]] )) ^ pgm_read_dword(&s4[((uint8_t*)&t)[ID]]));
208
209 #endif
210 }
211
212 uint32_t cast5_f3(uint32_t d, uint32_t m, uint8_t r){
213         uint32_t t;
214         t = ROTL32((m - d),r);
215
216 #ifdef DEBUG
217         uint32_t ia,ib,ic,id;
218
219         uart_putstr("\r\n f3("); uart_hexdump(&d, 4); uart_putc(',');
220                 uart_hexdump(&m , 4); uart_putc(','); uart_hexdump(&r, 1);uart_putstr("): I=");
221                 uart_hexdump(&t, 4);
222
223         ia = pgm_read_dword(&s1[((uint8_t*)&t)[IA]] );
224         ib = pgm_read_dword(&s2[((uint8_t*)&t)[IB]] );
225         ic = pgm_read_dword(&s3[((uint8_t*)&t)[IC]] );
226         id = pgm_read_dword(&s4[((uint8_t*)&t)[ID]] );
227         
228         uart_putstr("\r\n\tIA="); uart_hexdump(&ia, 4);
229         uart_putstr("\r\n\tIB="); uart_hexdump(&ib, 4);
230         uart_putstr("\r\n\tIC="); uart_hexdump(&ic, 4);
231         uart_putstr("\r\n\tID="); uart_hexdump(&id, 4);
232         return (((ia + ib) ^ ic) - id);
233 #else
234         return ((pgm_read_dword(&s1[((uint8_t*)&t)[IA]] ) + pgm_read_dword(&s2[((uint8_t*)&t)[IB]] )) 
235                 ^ pgm_read_dword(&s3[((uint8_t*)&t)[IC]] )) - pgm_read_dword(&s4[((uint8_t*)&t)[ID]] );
236
237 #endif
238 }
239
240
241
242 void cast5_enc(cast5_ctx_t *s, void* block){
243         uint32_t l,r, x, y;
244         uint8_t i;
245         cast5_f_t* f[]={cast5_f1,cast5_f2,cast5_f3};
246         l=((uint32_t*)block)[0];
247         r=((uint32_t*)block)[1];
248 //      uart_putstr("\r\n round[-1] = ");
249 //      uart_hexdump(&r, 4);
250         for (i=0;i<(s->shortkey?12:16);++i){
251                 x = r;
252                 y = (f[i%3])(CHANGE_ENDIAN32(r), CHANGE_ENDIAN32(s->mask[i]), 
253                         (((s->roth[i>>3]) & (1<<(i&0x7)))?0x10:0x00) 
254                          + ( ((s->rotl[i>>1])>>((i&1)?4:0)) & 0x0f) );
255                 r = l ^ CHANGE_ENDIAN32(y);
256 //              uart_putstr("\r\n round["); DEBUG_B(i); uart_putstr("] = ");
257 //              uart_hexdump(&r, 4);
258                 l = x;
259         }
260         ((uint32_t*)block)[0]=r;
261         ((uint32_t*)block)[1]=l;
262 }
263
264
265 void cast5_dec(cast5_ctx_t *s, void* block){
266         uint32_t l,r, x, y;
267         int8_t i, rounds;
268         cast5_f_t* f[]={cast5_f1,cast5_f2,cast5_f3};
269         l=((uint32_t*)block)[0];
270         r=((uint32_t*)block)[1];
271         rounds = (s->shortkey?12:16);
272         for (i=rounds-1; i>=0 ;--i){
273                 x = r;
274                 y = (f[i%3])(CHANGE_ENDIAN32(r), CHANGE_ENDIAN32(s->mask[i]), 
275                         (((s->roth[i>>3]) & (1<<(i&0x7)))?0x10:0x00) 
276                          + ( ((s->rotl[i>>1])>>((i&1)?4:0)) & 0x0f) );
277                 r = l ^ CHANGE_ENDIAN32(y);
278                 l = x;
279         }
280         ((uint32_t*)block)[0]=r;
281         ((uint32_t*)block)[1]=l;
282 }
283
284
285 /*********************************************************************************************************/
286 /*********************************************************************************************************/
287 /*********************************************************************************************************/
288
289 #if 0
290
291 void cast5_old_init(cast5_ctx_t* s, uint8_t* key, uint8_t keylength){
292          /* we migth return if the key is valid and if setup was sucessfull */
293         uint32_t x[4], z[4], t;
294         #define BPX ((uint8_t*)&(x[0]))
295         #define BPZ ((uint8_t*)&(z[0]))
296         s->shortkey = (keylength<=80);
297         /* littel endian only! */
298         memset(&(x[0]), 0 ,16); /* set x to zero */
299         memcpy(&(x[0]), key, keylength/8);
300         
301
302         /* todo: merge a and b and compress the whole stuff */
303         /***** A *****/
304         z[0] = x[0] ^ S_5X(0xD) ^ S_6X(0xF) ^ S_7X(0xC) ^ S_8X(0xE) ^ S_7X(0x8);        
305         z[1] = x[2] ^ S_5Z(0x0) ^ S_6Z(0x2) ^ S_7Z(0x1) ^ S_8Z(0x3) ^ S_8X(0xA);
306         z[2] = x[3] ^ S_5Z(0x7) ^ S_6Z(0x6) ^ S_7Z(0x5) ^ S_8Z(0x4) ^ S_5X(0x9);
307         z[3] = x[1] ^ S_5Z(0xA) ^ S_6Z(0x9) ^ S_7Z(0xB) ^ S_8Z(0x8) ^ S_6X(0xB);
308         /***** M *****/
309         s->mask[0] = S_5Z(0x8) ^ S_6Z(0x9) ^ S_7Z(0x7) ^ S_8Z(0x6) ^ S_5Z(0x2);
310         s->mask[1] = S_5Z(0xA) ^ S_6Z(0xB) ^ S_7Z(0x5) ^ S_8Z(0x4) ^ S_6Z(0x6);
311         s->mask[2] = S_5Z(0xC) ^ S_6Z(0xD) ^ S_7Z(0x3) ^ S_8Z(0x2) ^ S_7Z(0x9);
312         s->mask[3] = S_5Z(0xE) ^ S_6Z(0xF) ^ S_7Z(0x1) ^ S_8Z(0x0) ^ S_8Z(0xC);
313         /***** B *****/
314         x[0] = z[2] ^ S_5Z(0x5) ^ S_6Z(0x7) ^ S_7Z(0x4) ^ S_8Z(0x6) ^ S_7Z(0x0);
315         x[1] = z[0] ^ S_5X(0x0) ^ S_6X(0x2) ^ S_7X(0x1) ^ S_8X(0x3) ^ S_8Z(0x2);
316         x[2] = z[1] ^ S_5X(0x7) ^ S_6X(0x6) ^ S_7X(0x5) ^ S_8X(0x4) ^ S_5Z(0x1);
317         x[3] = z[3] ^ S_5X(0xA) ^ S_6X(0x9) ^ S_7X(0xB) ^ S_8X(0x8) ^ S_6Z(0x3);
318         /***** N *****/
319         s->mask[4] = S_5X(0x3) ^ S_6X(0x2) ^ S_7X(0xC) ^ S_8X(0xD) ^ S_5X(0x8);
320         s->mask[5] = S_5X(0x1) ^ S_6X(0x0) ^ S_7X(0xE) ^ S_8X(0xF) ^ S_6X(0xD);
321         s->mask[6] = S_5X(0x7) ^ S_6X(0x6) ^ S_7X(0x8) ^ S_8X(0x9) ^ S_7X(0x3);
322         s->mask[7] = S_5X(0x5) ^ S_6X(0x4) ^ S_7X(0xA) ^ S_8X(0xB) ^ S_8X(0x7);
323         /***** A *****/
324         z[0] = x[0] ^ S_5X(0xD) ^ S_6X(0xF) ^ S_7X(0xC) ^ S_8X(0xE) ^ S_7X(0x8);
325         z[1] = x[2] ^ S_5Z(0x0) ^ S_6Z(0x2) ^ S_7Z(0x1) ^ S_8Z(0x3) ^ S_8X(0xA);
326         z[2] = x[3] ^ S_5Z(0x7) ^ S_6Z(0x6) ^ S_7Z(0x5) ^ S_8Z(0x4) ^ S_5X(0x9);
327         z[3] = x[1] ^ S_5Z(0xA) ^ S_6Z(0x9) ^ S_7Z(0xB) ^ S_8Z(0x8) ^ S_6X(0xB);
328         /***** N' *****/
329         s->mask[8] = S_5Z(0x3) ^ S_6Z(0x2) ^ S_7Z(0xC) ^ S_8Z(0xD) ^ S_5Z(0x9);
330         s->mask[9] = S_5Z(0x1) ^ S_6Z(0x0) ^ S_7Z(0xE) ^ S_8Z(0xF) ^ S_6Z(0xC);
331         s->mask[10] = S_5Z(0x7) ^ S_6Z(0x6) ^ S_7Z(0x8) ^ S_8Z(0x9) ^ S_7Z(0x2);
332         s->mask[11] = S_5Z(0x5) ^ S_6Z(0x4) ^ S_7Z(0xA) ^ S_8Z(0xB) ^ S_8Z(0x6);
333         /***** B *****/
334         x[0] = z[2] ^ S_5Z(0x5) ^ S_6Z(0x7) ^ S_7Z(0x4) ^ S_8Z(0x6) ^ S_7Z(0x0);
335         x[1] = z[0] ^ S_5X(0x0) ^ S_6X(0x2) ^ S_7X(0x1) ^ S_8X(0x3) ^ S_8Z(0x2);
336         x[2] = z[1] ^ S_5X(0x7) ^ S_6X(0x6) ^ S_7X(0x5) ^ S_8X(0x4) ^ S_5Z(0x1);
337         x[3] = z[3] ^ S_5X(0xA) ^ S_6X(0x9) ^ S_7X(0xB) ^ S_8X(0x8) ^ S_6Z(0x3);
338         /***** M' *****/
339         s->mask[12] = S_5X(0x8) ^ S_6X(0x9) ^ S_7X(0x7) ^ S_8X(0x6) ^ S_5X(0x3);
340         s->mask[13] = S_5X(0xA) ^ S_6X(0xB) ^ S_7X(0x5) ^ S_8X(0x4) ^ S_6X(0x7);
341         s->mask[14] = S_5X(0xC) ^ S_6X(0xD) ^ S_7X(0x3) ^ S_8X(0x2) ^ S_7X(0x8);
342         s->mask[15] = S_5X(0xE) ^ S_6X(0xF) ^ S_7X(0x1) ^ S_8X(0x0) ^ S_8X(0xD);
343
344         /* that were the masking keys, now the rotation keys */
345         /* set the keys to zero */
346         memset(&(s->rotl[0]),0,8);
347         s->roth[0]=s->roth[1]=0;
348         /***** A *****/
349         z[0] = x[0] ^ S_5X(0xD) ^ S_6X(0xF) ^ S_7X(0xC) ^ S_8X(0xE) ^ S_7X(0x8);
350         z[1] = x[2] ^ S_5Z(0x0) ^ S_6Z(0x2) ^ S_7Z(0x1) ^ S_8Z(0x3) ^ S_8X(0xA);
351         z[2] = x[3] ^ S_5Z(0x7) ^ S_6Z(0x6) ^ S_7Z(0x5) ^ S_8Z(0x4) ^ S_5X(0x9);
352         z[3] = x[1] ^ S_5Z(0xA) ^ S_6Z(0x9) ^ S_7Z(0xB) ^ S_8Z(0x8) ^ S_6X(0xB);
353         /***** M *****/
354         t = S_5Z(0x8) ^ S_6Z(0x9) ^ S_7Z(0x7) ^ S_8Z(0x6) ^ S_5Z(0x2);
355         t >>= 24;
356         s->rotl[0] |= t & 0x0f;         
357         s->roth[0] |= (t >> 4) & (1<<0);
358         t = S_5Z(0xA) ^ S_6Z(0xB) ^ S_7Z(0x5) ^ S_8Z(0x4) ^ S_6Z(0x6);
359         t >>= 24;
360         s->rotl[0] |= (t<<4) & 0xf0;
361         s->roth[0] |= (t >> 3) & (1<<1);
362         t = S_5Z(0xC) ^ S_6Z(0xD) ^ S_7Z(0x3) ^ S_8Z(0x2) ^ S_7Z(0x9);
363         t >>= 24;
364         s->rotl[1] |= t & 0x0f;         
365         s->roth[0] |= (t >> 2) & (1<<2);
366         t = S_5Z(0xE) ^ S_6Z(0xF) ^ S_7Z(0x1) ^ S_8Z(0x0) ^ S_8Z(0xC);
367         t >>= 24;
368         s->rotl[1] |= (t<<4) & 0xf0;
369         s->roth[0] |= (t >> 1) & (1<<3);
370         /***** B *****/
371         x[0] = z[2] ^ S_5Z(0x5) ^ S_6Z(0x7) ^ S_7Z(0x4) ^ S_8Z(0x6) ^ S_7Z(0x0);
372         x[1] = z[0] ^ S_5X(0x0) ^ S_6X(0x2) ^ S_7X(0x1) ^ S_8X(0x3) ^ S_8Z(0x2);
373         x[2] = z[1] ^ S_5X(0x7) ^ S_6X(0x6) ^ S_7X(0x5) ^ S_8X(0x4) ^ S_5Z(0x1);
374         x[3] = z[3] ^ S_5X(0xA) ^ S_6X(0x9) ^ S_7X(0xB) ^ S_8X(0x8) ^ S_6Z(0x3);
375         /***** N *****/
376         t = S_5X(0x3) ^ S_6X(0x2) ^ S_7X(0xC) ^ S_8X(0xD) ^ S_5X(0x8);
377         t >>= 24;
378         s->rotl[2] |= t & 0x0f;         
379         s->roth[0] |= t & (1<<4);
380         t = S_5X(0x1) ^ S_6X(0x0) ^ S_7X(0xE) ^ S_8X(0xF) ^ S_6X(0xD);
381         t >>= 24;
382         s->rotl[2] |= (t<<4) & 0xf0;            
383         s->roth[0] |= (t<<1) & (1<<5);
384         t = S_5X(0x7) ^ S_6X(0x6) ^ S_7X(0x8) ^ S_8X(0x9) ^ S_7X(0x3);
385         t >>= 24;
386         s->rotl[3] |= t & 0x0f;         
387         s->roth[0] |= (t<<2) & (1<<6);
388         t = S_5X(0x5) ^ S_6X(0x4) ^ S_7X(0xA) ^ S_8X(0xB) ^ S_8X(0x7);
389         t >>= 24;
390         s->rotl[3] |= (t<<4) & 0xf0;            
391         s->roth[0] |= (t<<3) & (1<<7);
392         /***** A *****/
393         z[0] = x[0] ^ S_5X(0xD) ^ S_6X(0xF) ^ S_7X(0xC) ^ S_8X(0xE) ^ S_7X(0x8);
394         z[1] = x[2] ^ S_5Z(0x0) ^ S_6Z(0x2) ^ S_7Z(0x1) ^ S_8Z(0x3) ^ S_8X(0xA);
395         z[2] = x[3] ^ S_5Z(0x7) ^ S_6Z(0x6) ^ S_7Z(0x5) ^ S_8Z(0x4) ^ S_5X(0x9);
396         z[3] = x[1] ^ S_5Z(0xA) ^ S_6Z(0x9) ^ S_7Z(0xB) ^ S_8Z(0x8) ^ S_6X(0xB);
397         /***** N' *****/
398         t = S_5Z(0x3) ^ S_6Z(0x2) ^ S_7Z(0xC) ^ S_8Z(0xD) ^ S_5Z(0x9);
399         t >>= 24;
400         s->rotl[4] |= t & 0x0f;         
401         s->roth[1] |= (t>>4) & (1<<0);
402         t = S_5Z(0x1) ^ S_6Z(0x0) ^ S_7Z(0xE) ^ S_8Z(0xF) ^ S_6Z(0xC);
403         t >>= 24;
404         s->rotl[4] |= (t<<4) & 0xf0;            
405         s->roth[1] |= (t>>3) & (1<<1);
406         t = S_5Z(0x7) ^ S_6Z(0x6) ^ S_7Z(0x8) ^ S_8Z(0x9) ^ S_7Z(0x2);
407         t >>= 24;
408         s->rotl[5] |= t & 0x0f;         
409         s->roth[1] |= (t>>2) & (1<<2);
410         t = S_5Z(0x5) ^ S_6Z(0x4) ^ S_7Z(0xA) ^ S_8Z(0xB) ^ S_8Z(0x6);
411         t >>= 24;
412         s->rotl[5] |= (t<<4) & 0xf0;            
413         s->roth[1] |= (t>>1) & (1<<3);
414         /***** B *****/
415         x[0] = z[2] ^ S_5Z(0x5) ^ S_6Z(0x7) ^ S_7Z(0x4) ^ S_8Z(0x6) ^ S_7Z(0x0);
416         x[1] = z[0] ^ S_5X(0x0) ^ S_6X(0x2) ^ S_7X(0x1) ^ S_8X(0x3) ^ S_8Z(0x2);
417         x[2] = z[1] ^ S_5X(0x7) ^ S_6X(0x6) ^ S_7X(0x5) ^ S_8X(0x4) ^ S_5Z(0x1);
418         x[3] = z[3] ^ S_5X(0xA) ^ S_6X(0x9) ^ S_7X(0xB) ^ S_8X(0x8) ^ S_6Z(0x3);
419         /***** M' *****/
420         t = S_5X(0x8) ^ S_6X(0x9) ^ S_7X(0x7) ^ S_8X(0x6) ^ S_5X(0x3);
421         t >>= 24;
422         s->rotl[6] |= t & 0x0f;         
423         s->roth[1] |= t & (1<<4);
424         t = S_5X(0xA) ^ S_6X(0xB) ^ S_7X(0x5) ^ S_8X(0x4) ^ S_6X(0x7);
425         t >>= 24;
426         s->rotl[6] |= (t<<4) & 0xf0;            
427         s->roth[1] |= (t<<1) & (1<<5);
428         t = S_5X(0xC) ^ S_6X(0xD) ^ S_7X(0x3) ^ S_8X(0x2) ^ S_7X(0x8);
429         t >>= 24;
430         s->rotl[7] |= t & 0x0f;         
431         s->roth[1] |= (t<<2) & (1<<6);
432         t = S_5X(0xE) ^ S_6X(0xF) ^ S_7X(0x1) ^ S_8X(0x0) ^ S_8X(0xD);
433         t >>= 24;
434         s->rotl[7] |= (t<<4) & 0xf0;            
435         s->roth[1] |= (t<<3) & (1<<7);
436         
437         /* done ;-) */
438 }
439
440 #endif
441
442
443
444