3 This file is part of the Crypto-avr-lib/microcrypt-lib.
4 Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de)
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>.
22 * \email daniel.otte@rub.de
26 * \brief Implementation of the CAST5 (aka CAST-128) cipher algorithm as described in RFC 2144
42 #include "cast5-sbox.h"
46 #define S5(x) pgm_read_dword(&s5[(x)])
47 #define S6(x) pgm_read_dword(&s6[(x)])
48 #define S7(x) pgm_read_dword(&s7[(x)])
49 #define S8(x) pgm_read_dword(&s8[(x)])
52 void cast5_init_A(uint8_t *dest, uint8_t *src, bool bmode){
53 uint8_t mask = bmode?0x8:0;
54 *((uint32_t*)(&dest[0x0])) = *((uint32_t*)(&src[0x0^mask])) ^ S5(src[0xD^mask]) ^ S6(src[0xF^mask]) ^ S7(src[0xC^mask]) ^ S8(src[0xE^mask]) ^ S7(src[0x8^mask]);
55 *((uint32_t*)(&dest[0x4])) = *((uint32_t*)(&src[0x8^mask])) ^ S5(dest[0x0]) ^ S6(dest[0x2]) ^ S7(dest[0x1]) ^ S8(dest[0x3]) ^ S8(src[0xA^mask]);
56 *((uint32_t*)(&dest[0x8])) = *((uint32_t*)(&src[0xC^mask])) ^ S5(dest[0x7]) ^ S6(dest[0x6]) ^ S7(dest[0x5]) ^ S8(dest[0x4]) ^ S5(src[0x9^mask]);
57 *((uint32_t*)(&dest[0xC])) = *((uint32_t*)(&src[0x4^mask])) ^ S5(dest[0xA]) ^ S6(dest[0x9]) ^ S7(dest[0xB]) ^ S8(dest[0x8]) ^ S6(src[0xB^mask]);
60 void cast5_init_M(uint8_t *dest, uint8_t *src, bool nmode, bool xmode){
61 uint8_t nmt[] = {0xB, 0xA, 0x9, 0x8, 0xF, 0xE, 0xD, 0xC, 0x3, 0x2, 0x1, 0x0, 0x7, 0x6, 0x5, 0x4}; /* nmode table */
62 uint8_t xmt[4][4] = {{0x2, 0x6, 0x9, 0xC}, {0x8, 0xD, 0x3, 0x7}, {0x3, 0x7, 0x8, 0xD}, {0x9, 0xC, 0x2, 0x6}};
63 #define NMT(x) (src[nmode?nmt[(x)]:(x)])
64 #define XMT(x) (src[xmt[(xmode<<1) + nmode][(x)]])
65 *((uint32_t*)(&dest[0x0])) = S5(NMT(0x8)) ^ S6(NMT(0x9)) ^ S7(NMT(0x7)) ^ S8(NMT(0x6)) ^ S5(XMT(0));
66 *((uint32_t*)(&dest[0x4])) = S5(NMT(0xA)) ^ S6(NMT(0xB)) ^ S7(NMT(0x5)) ^ S8(NMT(0x4)) ^ S6(XMT(1));
67 *((uint32_t*)(&dest[0x8])) = S5(NMT(0xC)) ^ S6(NMT(0xD)) ^ S7(NMT(0x3)) ^ S8(NMT(0x2)) ^ S7(XMT(2));
68 *((uint32_t*)(&dest[0xC])) = S5(NMT(0xE)) ^ S6(NMT(0xF)) ^ S7(NMT(0x1)) ^ S8(NMT(0x0)) ^ S8(XMT(3));
71 #define S5B(x) pgm_read_byte(3+(uint8_t*)(&s5[(x)]))
72 #define S6B(x) pgm_read_byte(3+(uint8_t*)(&s6[(x)]))
73 #define S7B(x) pgm_read_byte(3+(uint8_t*)(&s7[(x)]))
74 #define S8B(x) pgm_read_byte(3+(uint8_t*)(&s8[(x)]))
76 void cast5_init_rM(uint8_t *klo, uint8_t *khi, uint8_t offset, uint8_t *src, bool nmode, bool xmode){
77 uint8_t nmt[] = {0xB, 0xA, 0x9, 0x8, 0xF, 0xE, 0xD, 0xC, 0x3, 0x2, 0x1, 0x0, 0x7, 0x6, 0x5, 0x4}; /* nmode table */
78 uint8_t xmt[4][4] = {{0x2, 0x6, 0x9, 0xC}, {0x8, 0xD, 0x3, 0x7}, {0x3, 0x7, 0x8, 0xD}, {0x9, 0xC, 0x2, 0x6}};
80 t = S5B(NMT(0x8)) ^ S6B(NMT(0x9)) ^ S7B(NMT(0x7)) ^ S8B(NMT(0x6)) ^ S5B(XMT(0));
81 klo[offset*2] |= (t & 0x0f);
83 t = S5B(NMT(0xA)) ^ S6B(NMT(0xB)) ^ S7B(NMT(0x5)) ^ S8B(NMT(0x4)) ^ S6B(XMT(1));
84 klo[offset*2] |= (t<<4) & 0xf0;
86 t = S5B(NMT(0xC)) ^ S6B(NMT(0xD)) ^ S7B(NMT(0x3)) ^ S8B(NMT(0x2)) ^ S7B(XMT(2));
87 klo[offset*2+1] |= t&0xf;
89 t = S5B(NMT(0xE)) ^ S6B(NMT(0xF)) ^ S7B(NMT(0x1)) ^ S8B(NMT(0x0)) ^ S8B(XMT(3));
90 klo[offset*2+1] |= t<<4;
93 uart_putstr("\r\n\t h="); uart_hexdump(&h,1);
95 khi[offset>>1] |= h<<((offset&0x1)?4:0);
98 #define S_5X(s) pgm_read_dword(&s5[BPX[(s)]])
99 #define S_6X(s) pgm_read_dword(&s6[BPX[(s)]])
100 #define S_7X(s) pgm_read_dword(&s7[BPX[(s)]])
101 #define S_8X(s) pgm_read_dword(&s8[BPX[(s)]])
103 #define S_5Z(s) pgm_read_dword(&s5[BPZ[(s)]])
104 #define S_6Z(s) pgm_read_dword(&s6[BPZ[(s)]])
105 #define S_7Z(s) pgm_read_dword(&s7[BPZ[(s)]])
106 #define S_8Z(s) pgm_read_dword(&s8[BPZ[(s)]])
111 * \brief sets up round keys (context) for cast5 en/decryption.
112 * @param s Pointer to cast5 context.
113 * @param key Pointer to binary key.
114 * @param keylength length of keydata in bits.
116 void cast5_init(cast5_ctx_t* s, uint8_t* key, uint8_t keylength){
117 /* we migth return if the key is valid and if setup was sucessfull */
119 #define BPX ((uint8_t*)&(x[0]))
120 #define BPZ ((uint8_t*)&(z[0]))
121 s->shortkey = (keylength<=80);
122 /* littel endian only! */
123 memset(&(x[0]), 0 ,16); /* set x to zero */
126 memcpy(&(x[0]), key, (keylength+7)/8);
129 /* todo: merge a and b and compress the whole stuff */
131 cast5_init_A((uint8_t*)(&z[0]), (uint8_t*)(&x[0]), false);
133 cast5_init_M((uint8_t*)(&(s->mask[0])), (uint8_t*)(&z[0]), false, false);
135 cast5_init_A((uint8_t*)(&x[0]), (uint8_t*)(&z[0]), true);
137 cast5_init_M((uint8_t*)(&(s->mask[4])), (uint8_t*)(&x[0]), true, false);
139 cast5_init_A((uint8_t*)(&z[0]), (uint8_t*)(&x[0]), false);
141 cast5_init_M((uint8_t*)(&(s->mask[8])), (uint8_t*)(&z[0]), true, true);
143 cast5_init_A((uint8_t*)(&x[0]), (uint8_t*)(&z[0]), true);
145 cast5_init_M((uint8_t*)(&(s->mask[12])), (uint8_t*)(&x[0]), false, true);
147 /* that were the masking keys, now the rotation keys */
148 /* set the keys to zero */
149 memset(&(s->rotl[0]),0,8);
150 s->roth[0]=s->roth[1]=0;
152 cast5_init_A((uint8_t*)(&z[0]), (uint8_t*)(&x[0]), false);
154 cast5_init_rM(&(s->rotl[0]), &(s->roth[0]), 0, (uint8_t*)(&z[0]), false, false);
156 cast5_init_A((uint8_t*)(&x[0]), (uint8_t*)(&z[0]), true);
158 cast5_init_rM(&(s->rotl[0]), &(s->roth[0]), 1, (uint8_t*)(&x[0]), true, false);
160 cast5_init_A((uint8_t*)(&z[0]), (uint8_t*)(&x[0]), false);
162 cast5_init_rM(&(s->rotl[0]), &(s->roth[0]), 2, (uint8_t*)(&z[0]), true, true);
164 cast5_init_A((uint8_t*)(&x[0]), (uint8_t*)(&z[0]), true);
166 cast5_init_rM(&(s->rotl[0]), &(s->roth[0]), 3, (uint8_t*)(&x[0]), false, true);
172 /********************************************************************************************************/
174 #define ROTL32(a,n) ((a)<<(n) | (a)>>(32-(n)))
175 #define CHANGE_ENDIAN32(x) ((x)<<24 | (x)>>24 | ((x)&0xff00)<<8 | ((x)&0xff0000)>>8 )
177 typedef uint32_t cast5_f_t(uint32_t,uint32_t,uint8_t);
185 uint32_t cast5_f1(uint32_t d, uint32_t m, uint8_t r){
187 t = ROTL32((d + m),r);
189 uint32_t ia,ib,ic,id;
190 uart_putstr("\r\n f1("); uart_hexdump(&d, 4); uart_putc(',');
191 uart_hexdump(&m , 4); uart_putc(','); uart_hexdump(&r, 1);uart_putstr("): I=");
193 ia = pgm_read_dword(&s1[((uint8_t*)&t)[IA]] );
194 ib = pgm_read_dword(&s2[((uint8_t*)&t)[IB]] );
195 ic = pgm_read_dword(&s3[((uint8_t*)&t)[IC]] );
196 id = pgm_read_dword(&s4[((uint8_t*)&t)[ID]] );
197 uart_putstr("\r\n\tIA="); uart_hexdump(&ia, 4);
198 uart_putstr("\r\n\tIB="); uart_hexdump(&ib, 4);
199 uart_putstr("\r\n\tIC="); uart_hexdump(&ic, 4);
200 uart_putstr("\r\n\tID="); uart_hexdump(&id, 4);
202 return (((ia ^ ib) - ic) + id);
206 return (((pgm_read_dword(&s1[((uint8_t*)&t)[IA]] ) ^ pgm_read_dword(&s2[((uint8_t*)&t)[IB]] ))
207 - pgm_read_dword(&s3[((uint8_t*)&t)[IC]] )) + pgm_read_dword(&s4[((uint8_t*)&t)[ID]]));
213 uint32_t cast5_f2(uint32_t d, uint32_t m, uint8_t r){
215 t = ROTL32((d ^ m),r);
217 uint32_t ia,ib,ic,id;
218 uart_putstr("\r\n f2("); uart_hexdump(&d, 4); uart_putc(',');
219 uart_hexdump(&m , 4); uart_putc(','); uart_hexdump(&r, 1);uart_putstr("): I=");
222 ia = pgm_read_dword(&s1[((uint8_t*)&t)[IA]] );
223 ib = pgm_read_dword(&s2[((uint8_t*)&t)[IB]] );
224 ic = pgm_read_dword(&s3[((uint8_t*)&t)[IC]] );
225 id = pgm_read_dword(&s4[((uint8_t*)&t)[ID]] );
227 uart_putstr("\r\n\tIA="); uart_hexdump(&ia, 4);
228 uart_putstr("\r\n\tIB="); uart_hexdump(&ib, 4);
229 uart_putstr("\r\n\tIC="); uart_hexdump(&ic, 4);
230 uart_putstr("\r\n\tID="); uart_hexdump(&id, 4);
232 return (((ia - ib) + ic) ^ id);
235 return (((pgm_read_dword(&s1[((uint8_t*)&t)[IA]])
236 - pgm_read_dword(&s2[((uint8_t*)&t)[IB]]) )
237 + pgm_read_dword(&s3[((uint8_t*)&t)[IC]]) )
238 ^ pgm_read_dword(&s4[((uint8_t*)&t)[ID]]) );
243 uint32_t cast5_f3(uint32_t d, uint32_t m, uint8_t r){
245 t = ROTL32((m - d),r);
248 uint32_t ia,ib,ic,id;
250 uart_putstr("\r\n f3("); uart_hexdump(&d, 4); uart_putc(',');
251 uart_hexdump(&m , 4); uart_putc(','); uart_hexdump(&r, 1);uart_putstr("): I=");
254 ia = pgm_read_dword(&s1[((uint8_t*)&t)[IA]] );
255 ib = pgm_read_dword(&s2[((uint8_t*)&t)[IB]] );
256 ic = pgm_read_dword(&s3[((uint8_t*)&t)[IC]] );
257 id = pgm_read_dword(&s4[((uint8_t*)&t)[ID]] );
259 uart_putstr("\r\n\tIA="); uart_hexdump(&ia, 4);
260 uart_putstr("\r\n\tIB="); uart_hexdump(&ib, 4);
261 uart_putstr("\r\n\tIC="); uart_hexdump(&ic, 4);
262 uart_putstr("\r\n\tID="); uart_hexdump(&id, 4);
263 return (((ia + ib) ^ ic) - id);
265 return ((pgm_read_dword(&s1[((uint8_t*)&t)[IA]] ) + pgm_read_dword(&s2[((uint8_t*)&t)[IB]] ))
266 ^ pgm_read_dword(&s3[((uint8_t*)&t)[IC]] )) - pgm_read_dword(&s4[((uint8_t*)&t)[ID]] );
271 /*************************************************************************/
274 * \brief encrypts a datablock with cast5
275 * @param s Pointer to cast5 roundkeys (context)
276 * @param block Pointer to datablock
278 void cast5_enc(cast5_ctx_t *s, void* block){
281 cast5_f_t* f[]={cast5_f1,cast5_f2,cast5_f3};
282 l=((uint32_t*)block)[0];
283 r=((uint32_t*)block)[1];
284 // uart_putstr("\r\n round[-1] = ");
285 // uart_hexdump(&r, 4);
286 for (i=0;i<(s->shortkey?12:16);++i){
288 y = (f[i%3])(CHANGE_ENDIAN32(r), CHANGE_ENDIAN32(s->mask[i]),
289 (((s->roth[i>>3]) & (1<<(i&0x7)))?0x10:0x00)
290 + ( ((s->rotl[i>>1])>>((i&1)?4:0)) & 0x0f) );
291 r = l ^ CHANGE_ENDIAN32(y);
292 // uart_putstr("\r\n round["); DEBUG_B(i); uart_putstr("] = ");
293 // uart_hexdump(&r, 4);
296 ((uint32_t*)block)[0]=r;
297 ((uint32_t*)block)[1]=l;
300 /*************************************************************************/
303 * \brief decrypts a datablock with cast5
304 * @param s Pointer to cast5 roundkeys (context)
305 * @param block Pointer to datablock
307 void cast5_dec(cast5_ctx_t *s, void* block){
310 cast5_f_t* f[]={cast5_f1,cast5_f2,cast5_f3};
311 l=((uint32_t*)block)[0];
312 r=((uint32_t*)block)[1];
313 rounds = (s->shortkey?12:16);
314 for (i=rounds-1; i>=0 ;--i){
316 y = (f[i%3])(CHANGE_ENDIAN32(r), CHANGE_ENDIAN32(s->mask[i]),
317 (((s->roth[i>>3]) & (1<<(i&0x7)))?0x10:0x00)
318 + ( ((s->rotl[i>>1])>>((i&1)?4:0)) & 0x0f) );
319 r = l ^ CHANGE_ENDIAN32(y);
322 ((uint32_t*)block)[0]=r;
323 ((uint32_t*)block)[1]=l;
327 /*********************************************************************************************************/
328 /*********************************************************************************************************/
329 /*********************************************************************************************************/
333 void cast5_old_init(cast5_ctx_t* s, uint8_t* key, uint8_t keylength){
334 /* we migth return if the key is valid and if setup was sucessfull */
335 uint32_t x[4], z[4], t;
336 #define BPX ((uint8_t*)&(x[0]))
337 #define BPZ ((uint8_t*)&(z[0]))
338 s->shortkey = (keylength<=80);
339 /* littel endian only! */
340 memset(&(x[0]), 0 ,16); /* set x to zero */
341 memcpy(&(x[0]), key, keylength/8);
344 /* todo: merge a and b and compress the whole stuff */
346 z[0] = x[0] ^ S_5X(0xD) ^ S_6X(0xF) ^ S_7X(0xC) ^ S_8X(0xE) ^ S_7X(0x8);
347 z[1] = x[2] ^ S_5Z(0x0) ^ S_6Z(0x2) ^ S_7Z(0x1) ^ S_8Z(0x3) ^ S_8X(0xA);
348 z[2] = x[3] ^ S_5Z(0x7) ^ S_6Z(0x6) ^ S_7Z(0x5) ^ S_8Z(0x4) ^ S_5X(0x9);
349 z[3] = x[1] ^ S_5Z(0xA) ^ S_6Z(0x9) ^ S_7Z(0xB) ^ S_8Z(0x8) ^ S_6X(0xB);
351 s->mask[0] = S_5Z(0x8) ^ S_6Z(0x9) ^ S_7Z(0x7) ^ S_8Z(0x6) ^ S_5Z(0x2);
352 s->mask[1] = S_5Z(0xA) ^ S_6Z(0xB) ^ S_7Z(0x5) ^ S_8Z(0x4) ^ S_6Z(0x6);
353 s->mask[2] = S_5Z(0xC) ^ S_6Z(0xD) ^ S_7Z(0x3) ^ S_8Z(0x2) ^ S_7Z(0x9);
354 s->mask[3] = S_5Z(0xE) ^ S_6Z(0xF) ^ S_7Z(0x1) ^ S_8Z(0x0) ^ S_8Z(0xC);
356 x[0] = z[2] ^ S_5Z(0x5) ^ S_6Z(0x7) ^ S_7Z(0x4) ^ S_8Z(0x6) ^ S_7Z(0x0);
357 x[1] = z[0] ^ S_5X(0x0) ^ S_6X(0x2) ^ S_7X(0x1) ^ S_8X(0x3) ^ S_8Z(0x2);
358 x[2] = z[1] ^ S_5X(0x7) ^ S_6X(0x6) ^ S_7X(0x5) ^ S_8X(0x4) ^ S_5Z(0x1);
359 x[3] = z[3] ^ S_5X(0xA) ^ S_6X(0x9) ^ S_7X(0xB) ^ S_8X(0x8) ^ S_6Z(0x3);
361 s->mask[4] = S_5X(0x3) ^ S_6X(0x2) ^ S_7X(0xC) ^ S_8X(0xD) ^ S_5X(0x8);
362 s->mask[5] = S_5X(0x1) ^ S_6X(0x0) ^ S_7X(0xE) ^ S_8X(0xF) ^ S_6X(0xD);
363 s->mask[6] = S_5X(0x7) ^ S_6X(0x6) ^ S_7X(0x8) ^ S_8X(0x9) ^ S_7X(0x3);
364 s->mask[7] = S_5X(0x5) ^ S_6X(0x4) ^ S_7X(0xA) ^ S_8X(0xB) ^ S_8X(0x7);
366 z[0] = x[0] ^ S_5X(0xD) ^ S_6X(0xF) ^ S_7X(0xC) ^ S_8X(0xE) ^ S_7X(0x8);
367 z[1] = x[2] ^ S_5Z(0x0) ^ S_6Z(0x2) ^ S_7Z(0x1) ^ S_8Z(0x3) ^ S_8X(0xA);
368 z[2] = x[3] ^ S_5Z(0x7) ^ S_6Z(0x6) ^ S_7Z(0x5) ^ S_8Z(0x4) ^ S_5X(0x9);
369 z[3] = x[1] ^ S_5Z(0xA) ^ S_6Z(0x9) ^ S_7Z(0xB) ^ S_8Z(0x8) ^ S_6X(0xB);
371 s->mask[8] = S_5Z(0x3) ^ S_6Z(0x2) ^ S_7Z(0xC) ^ S_8Z(0xD) ^ S_5Z(0x9);
372 s->mask[9] = S_5Z(0x1) ^ S_6Z(0x0) ^ S_7Z(0xE) ^ S_8Z(0xF) ^ S_6Z(0xC);
373 s->mask[10] = S_5Z(0x7) ^ S_6Z(0x6) ^ S_7Z(0x8) ^ S_8Z(0x9) ^ S_7Z(0x2);
374 s->mask[11] = S_5Z(0x5) ^ S_6Z(0x4) ^ S_7Z(0xA) ^ S_8Z(0xB) ^ S_8Z(0x6);
376 x[0] = z[2] ^ S_5Z(0x5) ^ S_6Z(0x7) ^ S_7Z(0x4) ^ S_8Z(0x6) ^ S_7Z(0x0);
377 x[1] = z[0] ^ S_5X(0x0) ^ S_6X(0x2) ^ S_7X(0x1) ^ S_8X(0x3) ^ S_8Z(0x2);
378 x[2] = z[1] ^ S_5X(0x7) ^ S_6X(0x6) ^ S_7X(0x5) ^ S_8X(0x4) ^ S_5Z(0x1);
379 x[3] = z[3] ^ S_5X(0xA) ^ S_6X(0x9) ^ S_7X(0xB) ^ S_8X(0x8) ^ S_6Z(0x3);
381 s->mask[12] = S_5X(0x8) ^ S_6X(0x9) ^ S_7X(0x7) ^ S_8X(0x6) ^ S_5X(0x3);
382 s->mask[13] = S_5X(0xA) ^ S_6X(0xB) ^ S_7X(0x5) ^ S_8X(0x4) ^ S_6X(0x7);
383 s->mask[14] = S_5X(0xC) ^ S_6X(0xD) ^ S_7X(0x3) ^ S_8X(0x2) ^ S_7X(0x8);
384 s->mask[15] = S_5X(0xE) ^ S_6X(0xF) ^ S_7X(0x1) ^ S_8X(0x0) ^ S_8X(0xD);
386 /* that were the masking keys, now the rotation keys */
387 /* set the keys to zero */
388 memset(&(s->rotl[0]),0,8);
389 s->roth[0]=s->roth[1]=0;
391 z[0] = x[0] ^ S_5X(0xD) ^ S_6X(0xF) ^ S_7X(0xC) ^ S_8X(0xE) ^ S_7X(0x8);
392 z[1] = x[2] ^ S_5Z(0x0) ^ S_6Z(0x2) ^ S_7Z(0x1) ^ S_8Z(0x3) ^ S_8X(0xA);
393 z[2] = x[3] ^ S_5Z(0x7) ^ S_6Z(0x6) ^ S_7Z(0x5) ^ S_8Z(0x4) ^ S_5X(0x9);
394 z[3] = x[1] ^ S_5Z(0xA) ^ S_6Z(0x9) ^ S_7Z(0xB) ^ S_8Z(0x8) ^ S_6X(0xB);
396 t = S_5Z(0x8) ^ S_6Z(0x9) ^ S_7Z(0x7) ^ S_8Z(0x6) ^ S_5Z(0x2);
398 s->rotl[0] |= t & 0x0f;
399 s->roth[0] |= (t >> 4) & (1<<0);
400 t = S_5Z(0xA) ^ S_6Z(0xB) ^ S_7Z(0x5) ^ S_8Z(0x4) ^ S_6Z(0x6);
402 s->rotl[0] |= (t<<4) & 0xf0;
403 s->roth[0] |= (t >> 3) & (1<<1);
404 t = S_5Z(0xC) ^ S_6Z(0xD) ^ S_7Z(0x3) ^ S_8Z(0x2) ^ S_7Z(0x9);
406 s->rotl[1] |= t & 0x0f;
407 s->roth[0] |= (t >> 2) & (1<<2);
408 t = S_5Z(0xE) ^ S_6Z(0xF) ^ S_7Z(0x1) ^ S_8Z(0x0) ^ S_8Z(0xC);
410 s->rotl[1] |= (t<<4) & 0xf0;
411 s->roth[0] |= (t >> 1) & (1<<3);
413 x[0] = z[2] ^ S_5Z(0x5) ^ S_6Z(0x7) ^ S_7Z(0x4) ^ S_8Z(0x6) ^ S_7Z(0x0);
414 x[1] = z[0] ^ S_5X(0x0) ^ S_6X(0x2) ^ S_7X(0x1) ^ S_8X(0x3) ^ S_8Z(0x2);
415 x[2] = z[1] ^ S_5X(0x7) ^ S_6X(0x6) ^ S_7X(0x5) ^ S_8X(0x4) ^ S_5Z(0x1);
416 x[3] = z[3] ^ S_5X(0xA) ^ S_6X(0x9) ^ S_7X(0xB) ^ S_8X(0x8) ^ S_6Z(0x3);
418 t = S_5X(0x3) ^ S_6X(0x2) ^ S_7X(0xC) ^ S_8X(0xD) ^ S_5X(0x8);
420 s->rotl[2] |= t & 0x0f;
421 s->roth[0] |= t & (1<<4);
422 t = S_5X(0x1) ^ S_6X(0x0) ^ S_7X(0xE) ^ S_8X(0xF) ^ S_6X(0xD);
424 s->rotl[2] |= (t<<4) & 0xf0;
425 s->roth[0] |= (t<<1) & (1<<5);
426 t = S_5X(0x7) ^ S_6X(0x6) ^ S_7X(0x8) ^ S_8X(0x9) ^ S_7X(0x3);
428 s->rotl[3] |= t & 0x0f;
429 s->roth[0] |= (t<<2) & (1<<6);
430 t = S_5X(0x5) ^ S_6X(0x4) ^ S_7X(0xA) ^ S_8X(0xB) ^ S_8X(0x7);
432 s->rotl[3] |= (t<<4) & 0xf0;
433 s->roth[0] |= (t<<3) & (1<<7);
435 z[0] = x[0] ^ S_5X(0xD) ^ S_6X(0xF) ^ S_7X(0xC) ^ S_8X(0xE) ^ S_7X(0x8);
436 z[1] = x[2] ^ S_5Z(0x0) ^ S_6Z(0x2) ^ S_7Z(0x1) ^ S_8Z(0x3) ^ S_8X(0xA);
437 z[2] = x[3] ^ S_5Z(0x7) ^ S_6Z(0x6) ^ S_7Z(0x5) ^ S_8Z(0x4) ^ S_5X(0x9);
438 z[3] = x[1] ^ S_5Z(0xA) ^ S_6Z(0x9) ^ S_7Z(0xB) ^ S_8Z(0x8) ^ S_6X(0xB);
440 t = S_5Z(0x3) ^ S_6Z(0x2) ^ S_7Z(0xC) ^ S_8Z(0xD) ^ S_5Z(0x9);
442 s->rotl[4] |= t & 0x0f;
443 s->roth[1] |= (t>>4) & (1<<0);
444 t = S_5Z(0x1) ^ S_6Z(0x0) ^ S_7Z(0xE) ^ S_8Z(0xF) ^ S_6Z(0xC);
446 s->rotl[4] |= (t<<4) & 0xf0;
447 s->roth[1] |= (t>>3) & (1<<1);
448 t = S_5Z(0x7) ^ S_6Z(0x6) ^ S_7Z(0x8) ^ S_8Z(0x9) ^ S_7Z(0x2);
450 s->rotl[5] |= t & 0x0f;
451 s->roth[1] |= (t>>2) & (1<<2);
452 t = S_5Z(0x5) ^ S_6Z(0x4) ^ S_7Z(0xA) ^ S_8Z(0xB) ^ S_8Z(0x6);
454 s->rotl[5] |= (t<<4) & 0xf0;
455 s->roth[1] |= (t>>1) & (1<<3);
457 x[0] = z[2] ^ S_5Z(0x5) ^ S_6Z(0x7) ^ S_7Z(0x4) ^ S_8Z(0x6) ^ S_7Z(0x0);
458 x[1] = z[0] ^ S_5X(0x0) ^ S_6X(0x2) ^ S_7X(0x1) ^ S_8X(0x3) ^ S_8Z(0x2);
459 x[2] = z[1] ^ S_5X(0x7) ^ S_6X(0x6) ^ S_7X(0x5) ^ S_8X(0x4) ^ S_5Z(0x1);
460 x[3] = z[3] ^ S_5X(0xA) ^ S_6X(0x9) ^ S_7X(0xB) ^ S_8X(0x8) ^ S_6Z(0x3);
462 t = S_5X(0x8) ^ S_6X(0x9) ^ S_7X(0x7) ^ S_8X(0x6) ^ S_5X(0x3);
464 s->rotl[6] |= t & 0x0f;
465 s->roth[1] |= t & (1<<4);
466 t = S_5X(0xA) ^ S_6X(0xB) ^ S_7X(0x5) ^ S_8X(0x4) ^ S_6X(0x7);
468 s->rotl[6] |= (t<<4) & 0xf0;
469 s->roth[1] |= (t<<1) & (1<<5);
470 t = S_5X(0xC) ^ S_6X(0xD) ^ S_7X(0x3) ^ S_8X(0x2) ^ S_7X(0x8);
472 s->rotl[7] |= t & 0x0f;
473 s->roth[1] |= (t<<2) & (1<<6);
474 t = S_5X(0xE) ^ S_6X(0xF) ^ S_7X(0x1) ^ S_8X(0x0) ^ S_8X(0xD);
476 s->rotl[7] |= (t<<4) & 0xf0;
477 s->roth[1] |= (t<<3) & (1<<7);