3 This file is part of the AVR-Crypto-Lib.
4 Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de)
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>.
22 * \email daniel.otte@rub.de
26 * \brief Implementation of the CAST5 (aka CAST-128) cipher algorithm as described in RFC 2144
33 #include <avr/pgmspace.h>
41 #include "cast5-sbox.h"
43 #define S5(x) pgm_read_dword(&s5[(x)])
44 #define S6(x) pgm_read_dword(&s6[(x)])
45 #define S7(x) pgm_read_dword(&s7[(x)])
46 #define S8(x) pgm_read_dword(&s8[(x)])
49 void cast5_init_A(uint8_t *dest, uint8_t *src, bool bmode)
51 uint8_t mask = bmode ? 0x8 : 0;
52 *((uint32_t*) (&dest[0x0])) = *((uint32_t*) (&src[0x0 ^ mask]))
53 ^ S5(src[0xD ^ mask]) ^ S6(src[0xF ^ mask])
54 ^ S7(src[0xC ^ mask]) ^ S8(src[0xE ^ mask])
55 ^ S7(src[0x8 ^ mask]);
56 *((uint32_t*) (&dest[0x4])) = *((uint32_t*) (&src[0x8 ^ mask]))
57 ^ S5(dest[0x0]) ^ S6(dest[0x2])
58 ^ S7(dest[0x1]) ^ S8(dest[0x3])
59 ^ S8(src[0xA ^ mask]);
60 *((uint32_t*) (&dest[0x8])) = *((uint32_t*) (&src[0xC ^ mask]))
61 ^ S5(dest[0x7]) ^ S6(dest[0x6])
62 ^ S7(dest[0x5]) ^ S8(dest[0x4])
63 ^ S5(src[0x9 ^ mask]);
64 *((uint32_t*) (&dest[0xC])) = *((uint32_t*) (&src[0x4 ^ mask]))
69 ^ S6(src[0xB ^ mask]);
73 void cast5_init_M(uint8_t *dest, uint8_t *src, bool nmode, bool xmode)
75 uint8_t nmt[] = { 0xB, 0xA, 0x9, 0x8,
78 0x7, 0x6, 0x5, 0x4 }; /* nmode table */
79 uint8_t xmt[4][4] = { { 0x2, 0x6, 0x9, 0xC },
80 { 0x8, 0xD, 0x3, 0x7 },
81 { 0x3, 0x7, 0x8, 0xD },
82 { 0x9, 0xC, 0x2, 0x6 } };
83 #define NMT(x) (src[nmode?nmt[(x)]:(x)])
84 #define XMT(x) (src[xmt[(xmode<<1) + nmode][(x)]])
85 *((uint32_t*) (&dest[0x0])) =
86 S5(NMT(0x8)) ^ S6(NMT(0x9)) ^ S7(NMT(0x7)) ^ S8(NMT(0x6)) ^ S5(XMT(0));
87 *((uint32_t*) (&dest[0x4])) =
88 S5(NMT(0xA)) ^ S6(NMT(0xB)) ^ S7(NMT(0x5)) ^ S8(NMT(0x4)) ^ S6(XMT(1));
89 *((uint32_t*) (&dest[0x8])) =
90 S5(NMT(0xC)) ^ S6(NMT(0xD)) ^ S7(NMT(0x3)) ^ S8(NMT(0x2)) ^ S7(XMT(2));
91 *((uint32_t*) (&dest[0xC])) =
92 S5(NMT(0xE)) ^ S6(NMT(0xF)) ^ S7(NMT(0x1)) ^ S8(NMT(0x0)) ^ S8(XMT(3));
95 #define S5B(x) pgm_read_byte(3+(uint8_t*)(&s5[(x)]))
96 #define S6B(x) pgm_read_byte(3+(uint8_t*)(&s6[(x)]))
97 #define S7B(x) pgm_read_byte(3+(uint8_t*)(&s7[(x)]))
98 #define S8B(x) pgm_read_byte(3+(uint8_t*)(&s8[(x)]))
101 void cast5_init_rM(uint8_t *klo, uint8_t *khi, uint8_t offset, uint8_t *src,
102 bool nmode, bool xmode)
104 uint8_t nmt[] = { 0xB, 0xA, 0x9, 0x8, 0xF, 0xE, 0xD, 0xC, 0x3, 0x2, 0x1,
105 0x0, 0x7, 0x6, 0x5, 0x4 }; /* nmode table */
106 uint8_t xmt[4][4] = { { 0x2, 0x6, 0x9, 0xC }, { 0x8, 0xD, 0x3, 0x7 }, { 0x3,
107 0x7, 0x8, 0xD }, { 0x9, 0xC, 0x2, 0x6 } };
110 S5B(NMT(0x8)) ^ S6B(NMT(0x9)) ^ S7B(NMT(0x7)) ^ S8B(NMT(0x6)) ^ S5B(XMT(0));
111 klo[offset * 2] |= (t & 0x0f);
115 S5B(NMT(0xA)) ^ S6B(NMT(0xB)) ^ S7B(NMT(0x5)) ^ S8B(NMT(0x4)) ^ S6B(XMT(1));
116 klo[offset * 2] |= (t << 4) & 0xf0;
120 S5B(NMT(0xC)) ^ S6B(NMT(0xD)) ^ S7B(NMT(0x3)) ^ S8B(NMT(0x2)) ^ S7B(XMT(2));
121 klo[offset * 2 + 1] |= t & 0xf;
125 S5B(NMT(0xE)) ^ S6B(NMT(0xF)) ^ S7B(NMT(0x1)) ^ S8B(NMT(0x0)) ^ S8B(XMT(3));
126 klo[offset * 2 + 1] |= t << 4;
130 cli_putstr("\r\n\t h="); cli_hexdump(&h,1);
132 khi[offset >> 1] |= h << ((offset & 0x1) ? 4 : 0);
135 #define S_5X(s) pgm_read_dword(&s5[BPX[(s)]])
136 #define S_6X(s) pgm_read_dword(&s6[BPX[(s)]])
137 #define S_7X(s) pgm_read_dword(&s7[BPX[(s)]])
138 #define S_8X(s) pgm_read_dword(&s8[BPX[(s)]])
140 #define S_5Z(s) pgm_read_dword(&s5[BPZ[(s)]])
141 #define S_6Z(s) pgm_read_dword(&s6[BPZ[(s)]])
142 #define S_7Z(s) pgm_read_dword(&s7[BPZ[(s)]])
143 #define S_8Z(s) pgm_read_dword(&s8[BPZ[(s)]])
145 void cast5_init(const void *key, uint16_t keylength_b, cast5_ctx_t *s)
147 /* we migth return if the key is valid and if setup was successful */
149 #define BPX ((uint8_t*)&(x[0]))
150 #define BPZ ((uint8_t*)&(z[0]))
151 s->shortkey = (keylength_b <= 80);
152 /* littel endian only! */
153 memset(&(x[0]), 0, 16); /* set x to zero */
154 if (keylength_b > 128)
156 memcpy(&(x[0]), key, (keylength_b + 7) / 8);
158 /* todo: merge a and b and compress the whole stuff */
160 cast5_init_A((uint8_t*) (&z[0]), (uint8_t*) (&x[0]), false);
162 cast5_init_M((uint8_t*) (&(s->mask[0])), (uint8_t*) (&z[0]), false, false);
164 cast5_init_A((uint8_t*) (&x[0]), (uint8_t*) (&z[0]), true);
166 cast5_init_M((uint8_t*) (&(s->mask[4])), (uint8_t*) (&x[0]), true, false);
168 cast5_init_A((uint8_t*) (&z[0]), (uint8_t*) (&x[0]), false);
170 cast5_init_M((uint8_t*) (&(s->mask[8])), (uint8_t*) (&z[0]), true, true);
172 cast5_init_A((uint8_t*) (&x[0]), (uint8_t*) (&z[0]), true);
174 cast5_init_M((uint8_t*) (&(s->mask[12])), (uint8_t*) (&x[0]), false, true);
176 /* that were the masking keys, now the rotation keys */
177 /* set the keys to zero */
178 memset(&(s->rotl[0]), 0, 8);
179 s->roth[0] = s->roth[1] = 0;
181 cast5_init_A((uint8_t*) (&z[0]), (uint8_t*) (&x[0]), false);
183 cast5_init_rM(&(s->rotl[0]), &(s->roth[0]), 0, (uint8_t*) (&z[0]), false, false);
185 cast5_init_A((uint8_t*) (&x[0]), (uint8_t*) (&z[0]), true);
187 cast5_init_rM(&(s->rotl[0]), &(s->roth[0]), 1, (uint8_t*) (&x[0]), true, false);
189 cast5_init_A((uint8_t*) (&z[0]), (uint8_t*) (&x[0]), false);
191 cast5_init_rM(&(s->rotl[0]), &(s->roth[0]), 2, (uint8_t*) (&z[0]), true, true);
193 cast5_init_A((uint8_t*) (&x[0]), (uint8_t*) (&z[0]), true);
195 cast5_init_rM(&(s->rotl[0]), &(s->roth[0]), 3, (uint8_t*) (&x[0]), false, true);
199 /********************************************************************************************************/
201 #define ROTL32(a,n) ((a)<<(n) | (a)>>(32-(n)))
202 #define CHANGE_ENDIAN32(x) ((x)<<24 | (x)>>24 | ((x)&0xff00)<<8 | ((x)&0xff0000)>>8 )
204 typedef uint32_t cast5_f_t(uint32_t, uint32_t, uint8_t);
211 static uint32_t cast5_f1(uint32_t d, uint32_t m, uint8_t r)
214 t = ROTL32((d + m), r);
216 uint32_t ia,ib,ic,id;
217 cli_putstr("\r\n f1("); cli_hexdump(&d, 4); cli_putc(',');
218 cli_hexdump(&m , 4); cli_putc(','); cli_hexdump(&r, 1);cli_putstr("): I=");
220 ia = pgm_read_dword(&s1[((uint8_t*)&t)[IA]] );
221 ib = pgm_read_dword(&s2[((uint8_t*)&t)[IB]] );
222 ic = pgm_read_dword(&s3[((uint8_t*)&t)[IC]] );
223 id = pgm_read_dword(&s4[((uint8_t*)&t)[ID]] );
224 cli_putstr("\r\n\tIA="); cli_hexdump(&ia, 4);
225 cli_putstr("\r\n\tIB="); cli_hexdump(&ib, 4);
226 cli_putstr("\r\n\tIC="); cli_hexdump(&ic, 4);
227 cli_putstr("\r\n\tID="); cli_hexdump(&id, 4);
229 return (((ia ^ ib) - ic) + id);
233 return ((( pgm_read_dword(&s1[((uint8_t*)&t)[IA]])
234 ^ pgm_read_dword(&s2[((uint8_t*)&t)[IB]]))
235 - pgm_read_dword(&s3[((uint8_t*)&t)[IC]]))
236 + pgm_read_dword(&s4[((uint8_t*)&t)[ID]]));
241 static uint32_t cast5_f2(uint32_t d, uint32_t m, uint8_t r)
244 t = ROTL32((d ^ m), r);
246 uint32_t ia,ib,ic,id;
247 cli_putstr("\r\n f2("); cli_hexdump(&d, 4); cli_putc(',');
248 cli_hexdump(&m , 4); cli_putc(','); cli_hexdump(&r, 1);cli_putstr("): I=");
251 ia = pgm_read_dword(&s1[((uint8_t*)&t)[IA]] );
252 ib = pgm_read_dword(&s2[((uint8_t*)&t)[IB]] );
253 ic = pgm_read_dword(&s3[((uint8_t*)&t)[IC]] );
254 id = pgm_read_dword(&s4[((uint8_t*)&t)[ID]] );
256 cli_putstr("\r\n\tIA="); cli_hexdump(&ia, 4);
257 cli_putstr("\r\n\tIB="); cli_hexdump(&ib, 4);
258 cli_putstr("\r\n\tIC="); cli_hexdump(&ic, 4);
259 cli_putstr("\r\n\tID="); cli_hexdump(&id, 4);
261 return (((ia - ib) + ic) ^ id);
264 return ((( pgm_read_dword(&s1[((uint8_t*)&t)[IA]])
265 - pgm_read_dword(&s2[((uint8_t*)&t)[IB]]))
266 + pgm_read_dword(&s3[((uint8_t*)&t)[IC]]))
267 ^ pgm_read_dword(&s4[((uint8_t*)&t)[ID]]));
272 static uint32_t cast5_f3(uint32_t d, uint32_t m, uint8_t r)
275 t = ROTL32((m - d), r);
278 uint32_t ia,ib,ic,id;
280 cli_putstr("\r\n f3("); cli_hexdump(&d, 4); cli_putc(',');
281 cli_hexdump(&m , 4); cli_putc(','); cli_hexdump(&r, 1);cli_putstr("): I=");
284 ia = pgm_read_dword(&s1[((uint8_t*)&t)[IA]] );
285 ib = pgm_read_dword(&s2[((uint8_t*)&t)[IB]] );
286 ic = pgm_read_dword(&s3[((uint8_t*)&t)[IC]] );
287 id = pgm_read_dword(&s4[((uint8_t*)&t)[ID]] );
289 cli_putstr("\r\n\tIA="); cli_hexdump(&ia, 4);
290 cli_putstr("\r\n\tIB="); cli_hexdump(&ib, 4);
291 cli_putstr("\r\n\tIC="); cli_hexdump(&ic, 4);
292 cli_putstr("\r\n\tID="); cli_hexdump(&id, 4);
293 return (((ia + ib) ^ ic) - id);
295 return (( pgm_read_dword(&s1[((uint8_t*)&t)[IA]] )
296 + pgm_read_dword(&s2[((uint8_t*)&t)[IB]]))
297 ^ pgm_read_dword(&s3[((uint8_t*)&t)[IC]]))
298 - pgm_read_dword(&s4[((uint8_t*)&t)[ID]]);
303 /******************************************************************************/
305 void cast5_enc(void *block, const cast5_ctx_t *s)
309 cast5_f_t *f[] = { cast5_f1, cast5_f2, cast5_f3 };
310 l = ((uint32_t*) block)[0];
311 r = ((uint32_t*) block)[1];
312 // cli_putstr("\r\n round[-1] = ");
313 // cli_hexdump(&r, 4);
314 for (i = 0; i < (s->shortkey ? 12 : 16); ++i) {
316 y = (f[i % 3])(CHANGE_ENDIAN32(r), CHANGE_ENDIAN32(s->mask[i]),
317 (((s->roth[i >> 3]) & (1 << (i & 0x7))) ? 0x10 : 0x00)
318 + (((s->rotl[i >> 1]) >> ((i & 1) ? 4 : 0)) & 0x0f));
319 r = l ^ CHANGE_ENDIAN32(y);
320 // cli_putstr("\r\n round["); DEBUG_B(i); cli_putstr("] = ");
321 // cli_hexdump(&r, 4);
324 ((uint32_t*) block)[0] = r;
325 ((uint32_t*) block)[1] = l;
328 /******************************************************************************/
330 void cast5_dec(void *block, const cast5_ctx_t *s)
334 cast5_f_t *f[] = { cast5_f1, cast5_f2, cast5_f3 };
335 l = ((uint32_t*) block)[0];
336 r = ((uint32_t*) block)[1];
337 rounds = (s->shortkey ? 12 : 16);
338 for (i = rounds - 1; i >= 0; --i) {
340 y = (f[i % 3])(CHANGE_ENDIAN32(r), CHANGE_ENDIAN32(s->mask[i]),
341 (((s->roth[i >> 3]) & (1 << (i & 0x7))) ? 0x10 : 0x00)
342 + (((s->rotl[i >> 1]) >> ((i & 1) ? 4 : 0)) & 0x0f));
343 r = l ^ CHANGE_ENDIAN32(y);
346 ((uint32_t*) block)[0] = r;
347 ((uint32_t*) block)[1] = l;
350 /******************************************************************************/