3 This file is part of the ARM-Crypto-Lib.
4 Copyright (C) 2006-2010 Daniel Otte (daniel.otte@rub.de)
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>.
22 * \email daniel.otte@rub.de
26 * \brief Implementation of the CAST5 (aka CAST-128) cipher algorithm as described in RFC 2144
41 #include "cast5-sbox.h"
45 #define S5(x) (s5[(x)])
46 #define S6(x) (s6[(x)])
47 #define S7(x) (s7[(x)])
48 #define S8(x) (s8[(x)])
51 void cast5_init_A(uint8_t *dest, uint8_t *src, bool bmode){
52 uint8_t mask = bmode?0x8:0;
53 *((uint32_t*)(&dest[0x0])) = *((uint32_t*)(&src[0x0^mask]))
54 ^ S5(src[0xD^mask]) ^ S6(src[0xF^mask])
55 ^ S7(src[0xC^mask]) ^ S8(src[0xE^mask])
57 *((uint32_t*)(&dest[0x4])) = *((uint32_t*)(&src[0x8^mask]))
58 ^ S5(dest[0x0]) ^ S6(dest[0x2])
59 ^ S7(dest[0x1]) ^ S8(dest[0x3])
61 *((uint32_t*)(&dest[0x8])) = *((uint32_t*)(&src[0xC^mask]))
62 ^ S5(dest[0x7]) ^ S6(dest[0x6])
63 ^ S7(dest[0x5]) ^ S8(dest[0x4])
65 *((uint32_t*)(&dest[0xC])) = *((uint32_t*)(&src[0x4^mask]))
74 void cast5_init_M(uint8_t *dest, uint8_t *src, bool nmode, bool xmode){
75 uint8_t nmt[] = {0xB, 0xA, 0x9, 0x8,
78 0x7, 0x6, 0x5, 0x4}; /* nmode table */
79 uint8_t xmt[4][4] = {{0x2, 0x6, 0x9, 0xC},
82 {0x9, 0xC, 0x2, 0x6}};
83 #define NMT(x) (src[nmode?nmt[(x)]:(x)])
84 #define XMT(x) (src[xmt[(xmode<<1) + nmode][(x)]])
85 *((uint32_t*)(&dest[0x0])) = S5(NMT(0x8)) ^ S6(NMT(0x9)) ^ S7(NMT(0x7)) ^ S8(NMT(0x6)) ^ S5(XMT(0));
86 *((uint32_t*)(&dest[0x4])) = S5(NMT(0xA)) ^ S6(NMT(0xB)) ^ S7(NMT(0x5)) ^ S8(NMT(0x4)) ^ S6(XMT(1));
87 *((uint32_t*)(&dest[0x8])) = S5(NMT(0xC)) ^ S6(NMT(0xD)) ^ S7(NMT(0x3)) ^ S8(NMT(0x2)) ^ S7(XMT(2));
88 *((uint32_t*)(&dest[0xC])) = S5(NMT(0xE)) ^ S6(NMT(0xF)) ^ S7(NMT(0x1)) ^ S8(NMT(0x0)) ^ S8(XMT(3));
91 #define S5B(x) *(3+(uint8_t*)(&s5[(x)]))
92 #define S6B(x) *(3+(uint8_t*)(&s6[(x)]))
93 #define S7B(x) *(3+(uint8_t*)(&s7[(x)]))
94 #define S8B(x) *(3+(uint8_t*)(&s8[(x)]))
97 void cast5_init_rM(uint8_t *klo, uint8_t *khi, uint8_t offset, uint8_t *src, bool nmode, bool xmode){
98 uint8_t nmt[] = {0xB, 0xA, 0x9, 0x8, 0xF, 0xE, 0xD, 0xC, 0x3, 0x2, 0x1, 0x0, 0x7, 0x6, 0x5, 0x4}; /* nmode table */
99 uint8_t xmt[4][4] = {{0x2, 0x6, 0x9, 0xC}, {0x8, 0xD, 0x3, 0x7}, {0x3, 0x7, 0x8, 0xD}, {0x9, 0xC, 0x2, 0x6}};
101 t = S5B(NMT(0x8)) ^ S6B(NMT(0x9)) ^ S7B(NMT(0x7)) ^ S8B(NMT(0x6)) ^ S5B(XMT(0));
102 klo[offset*2] |= (t & 0x0f);
103 h |= (t&0x10); h>>=1;
104 t = S5B(NMT(0xA)) ^ S6B(NMT(0xB)) ^ S7B(NMT(0x5)) ^ S8B(NMT(0x4)) ^ S6B(XMT(1));
105 klo[offset*2] |= (t<<4) & 0xf0;
107 t = S5B(NMT(0xC)) ^ S6B(NMT(0xD)) ^ S7B(NMT(0x3)) ^ S8B(NMT(0x2)) ^ S7B(XMT(2));
108 klo[offset*2+1] |= t&0xf;
110 t = S5B(NMT(0xE)) ^ S6B(NMT(0xF)) ^ S7B(NMT(0x1)) ^ S8B(NMT(0x0)) ^ S8B(XMT(3));
111 klo[offset*2+1] |= t<<4;
114 cli_putstr("\r\n\t h="); cli_hexdump(&h,1);
116 khi[offset>>1] |= h<<((offset&0x1)?4:0);
119 #define S_5X(s) (s5[BPX[(s)]])
120 #define S_6X(s) (s6[BPX[(s)]])
121 #define S_7X(s) (s7[BPX[(s)]])
122 #define S_8X(s) (s8[BPX[(s)]])
124 #define S_5Z(s) (s5[BPZ[(s)]])
125 #define S_6Z(s) (s6[BPZ[(s)]])
126 #define S_7Z(s) (s7[BPZ[(s)]])
127 #define S_8Z(s) (s8[BPZ[(s)]])
132 void cast5_init(const void* key, uint16_t keylength_b, cast5_ctx_t* s){
133 /* we migth return if the key is valid and if setup was successful */
135 #define BPX ((uint8_t*)&(x[0]))
136 #define BPZ ((uint8_t*)&(z[0]))
137 s->shortkey = (keylength_b<=80);
138 /* littel endian only! */
139 memset(&(x[0]), 0 ,16); /* set x to zero */
140 if(keylength_b > 128)
142 memcpy(&(x[0]), key, (keylength_b+7)/8);
145 /* todo: merge a and b and compress the whole stuff */
147 cast5_init_A((uint8_t*)(&z[0]), (uint8_t*)(&x[0]), false);
149 cast5_init_M((uint8_t*)(&(s->mask[0])), (uint8_t*)(&z[0]), false, false);
151 cast5_init_A((uint8_t*)(&x[0]), (uint8_t*)(&z[0]), true);
153 cast5_init_M((uint8_t*)(&(s->mask[4])), (uint8_t*)(&x[0]), true, false);
155 cast5_init_A((uint8_t*)(&z[0]), (uint8_t*)(&x[0]), false);
157 cast5_init_M((uint8_t*)(&(s->mask[8])), (uint8_t*)(&z[0]), true, true);
159 cast5_init_A((uint8_t*)(&x[0]), (uint8_t*)(&z[0]), true);
161 cast5_init_M((uint8_t*)(&(s->mask[12])), (uint8_t*)(&x[0]), false, true);
163 /* that were the masking keys, now the rotation keys */
164 /* set the keys to zero */
165 memset(&(s->rotl[0]),0,8);
166 s->roth[0]=s->roth[1]=0;
168 cast5_init_A((uint8_t*)(&z[0]), (uint8_t*)(&x[0]), false);
170 cast5_init_rM(&(s->rotl[0]), &(s->roth[0]), 0, (uint8_t*)(&z[0]), false, false);
172 cast5_init_A((uint8_t*)(&x[0]), (uint8_t*)(&z[0]), true);
174 cast5_init_rM(&(s->rotl[0]), &(s->roth[0]), 1, (uint8_t*)(&x[0]), true, false);
176 cast5_init_A((uint8_t*)(&z[0]), (uint8_t*)(&x[0]), false);
178 cast5_init_rM(&(s->rotl[0]), &(s->roth[0]), 2, (uint8_t*)(&z[0]), true, true);
180 cast5_init_A((uint8_t*)(&x[0]), (uint8_t*)(&z[0]), true);
182 cast5_init_rM(&(s->rotl[0]), &(s->roth[0]), 3, (uint8_t*)(&x[0]), false, true);
188 /********************************************************************************************************/
190 #define ROTL32(a,n) ((a)<<(n) | (a)>>(32-(n)))
191 #define CHANGE_ENDIAN32(x) ((x)<<24 | (x)>>24 | ((x)&0xff00)<<8 | ((x)&0xff0000)>>8 )
193 typedef uint32_t cast5_f_t(uint32_t,uint32_t,uint8_t);
201 uint32_t cast5_f1(uint32_t d, uint32_t m, uint8_t r){
203 t = ROTL32((d + m),r);
205 uint32_t ia,ib,ic,id;
206 cli_putstr("\r\n f1("); cli_hexdump(&d, 4); cli_putc(',');
207 cli_hexdump(&m , 4); cli_putc(','); cli_hexdump(&r, 1);cli_putstr("): I=");
209 ia = s1[((uint8_t*)&t)[IA]];
210 ib = s2[((uint8_t*)&t)[IB]];
211 ic = s3[((uint8_t*)&t)[IC]];
212 id = s4[((uint8_t*)&t)[ID]];
213 cli_putstr("\r\n\tIA="); cli_hexdump(&ia, 4);
214 cli_putstr("\r\n\tIB="); cli_hexdump(&ib, 4);
215 cli_putstr("\r\n\tIC="); cli_hexdump(&ic, 4);
216 cli_putstr("\r\n\tID="); cli_hexdump(&id, 4);
218 return (((ia ^ ib) - ic) + id);
222 return ((( s1[((uint8_t*)&t)[IA]]
223 ^ s2[((uint8_t*)&t)[IB]] )
224 - s3[((uint8_t*)&t)[IC]] )
225 + s4[((uint8_t*)&t)[ID]] );
231 uint32_t cast5_f2(uint32_t d, uint32_t m, uint8_t r){
233 t = ROTL32((d ^ m),r);
235 uint32_t ia,ib,ic,id;
236 cli_putstr("\r\n f2("); cli_hexdump(&d, 4); cli_putc(',');
237 cli_hexdump(&m , 4); cli_putc(','); cli_hexdump(&r, 1);cli_putstr("): I=");
240 ia = s1[((uint8_t*)&t)[IA]];
241 ib = s2[((uint8_t*)&t)[IB]];
242 ic = s3[((uint8_t*)&t)[IC]];
243 id = s4[((uint8_t*)&t)[ID]];
245 cli_putstr("\r\n\tIA="); cli_hexdump(&ia, 4);
246 cli_putstr("\r\n\tIB="); cli_hexdump(&ib, 4);
247 cli_putstr("\r\n\tIC="); cli_hexdump(&ic, 4);
248 cli_putstr("\r\n\tID="); cli_hexdump(&id, 4);
250 return (((ia - ib) + ic) ^ id);
253 return ((( s1[((uint8_t*)&t)[IA]]
254 - s2[((uint8_t*)&t)[IB]] )
255 + s3[((uint8_t*)&t)[IC]] )
256 ^ s4[((uint8_t*)&t)[ID]] );
262 uint32_t cast5_f3(uint32_t d, uint32_t m, uint8_t r){
264 t = ROTL32((m - d),r);
267 uint32_t ia,ib,ic,id;
269 cli_putstr("\r\n f3("); cli_hexdump(&d, 4); cli_putc(',');
270 cli_hexdump(&m , 4); cli_putc(','); cli_hexdump(&r, 1);cli_putstr("): I=");
273 ia = s1[((uint8_t*)&t)[IA]];
274 ib = s2[((uint8_t*)&t)[IB]];
275 ic = s3[((uint8_t*)&t)[IC]];
276 id = s4[((uint8_t*)&t)[ID]];
278 cli_putstr("\r\n\tIA="); cli_hexdump(&ia, 4);
279 cli_putstr("\r\n\tIB="); cli_hexdump(&ib, 4);
280 cli_putstr("\r\n\tIC="); cli_hexdump(&ic, 4);
281 cli_putstr("\r\n\tID="); cli_hexdump(&id, 4);
282 return (((ia + ib) ^ ic) - id);
284 return (( s1[((uint8_t*)&t)[IA]]
285 + s2[((uint8_t*)&t)[IB]])
286 ^ s3[((uint8_t*)&t)[IC]])
287 - s4[((uint8_t*)&t)[ID]];
292 /******************************************************************************/
294 void cast5_enc(void* block, const cast5_ctx_t *s){
297 cast5_f_t* f[]={cast5_f1,cast5_f2,cast5_f3};
298 l=((uint32_t*)block)[0];
299 r=((uint32_t*)block)[1];
300 // cli_putstr("\r\n round[-1] = ");
301 // cli_hexdump(&r, 4);
302 for (i=0;i<(s->shortkey?12:16);++i){
304 y = (f[i%3])(CHANGE_ENDIAN32(r), CHANGE_ENDIAN32(s->mask[i]),
305 (((s->roth[i>>3]) & (1<<(i&0x7)))?0x10:0x00)
306 + ( ((s->rotl[i>>1])>>((i&1)?4:0)) & 0x0f) );
307 r = l ^ CHANGE_ENDIAN32(y);
308 // cli_putstr("\r\n round["); DEBUG_B(i); cli_putstr("] = ");
309 // cli_hexdump(&r, 4);
312 ((uint32_t*)block)[0]=r;
313 ((uint32_t*)block)[1]=l;
316 /******************************************************************************/
318 void cast5_dec(void* block, const cast5_ctx_t *s){
321 cast5_f_t* f[]={cast5_f1,cast5_f2,cast5_f3};
322 l=((uint32_t*)block)[0];
323 r=((uint32_t*)block)[1];
324 rounds = (s->shortkey?12:16);
325 for (i=rounds-1; i>=0 ;--i){
327 y = (f[i%3])(CHANGE_ENDIAN32(r), CHANGE_ENDIAN32(s->mask[i]),
328 (((s->roth[i>>3]) & (1<<(i&0x7)))?0x10:0x00)
329 + ( ((s->rotl[i>>1])>>((i&1)?4:0)) & 0x0f) );
330 r = l ^ CHANGE_ENDIAN32(y);
333 ((uint32_t*)block)[0]=r;
334 ((uint32_t*)block)[1]=l;
338 /******************************************************************************/