]> git.cryptolib.org Git - avr-crypto-lib.git/blob - cast5.c
bug fixed in md5-asm.S ( wrong values for length_b%512=505..511 )
[avr-crypto-lib.git] / cast5.c
1 /* cast5.c */
2 /*
3     This file is part of the AVR-Crypto-Lib.
4     Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
5
6     This program is free software: you can redistribute it and/or modify
7     it under the terms of the GNU General Public License as published by
8     the Free Software Foundation, either version 3 of the License, or
9     (at your option) any later version.
10
11     This program is distributed in the hope that it will be useful,
12     but WITHOUT ANY WARRANTY; without even the implied warranty of
13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14     GNU General Public License for more details.
15
16     You should have received a copy of the GNU General Public License
17     along with this program.  If not, see <http://www.gnu.org/licenses/>.
18 */
19 /* 
20  * \file        cast5.c
21  * \author      Daniel Otte
22  * \email       daniel.otte@rub.de
23  * \date        2006-07-26
24  * \par License:
25  *  GPLv3 or later
26  * \brief Implementation of the CAST5 (aka CAST-128) cipher algorithm as described in RFC 2144
27  * 
28  */
29  
30  #include <stdint.h>
31  #include <string.h>
32  #include "cast5.h"
33  #include "config.h"
34  #include "debug.h"
35  
36  #undef DEBUG
37  
38  #ifdef DEBUG
39   #include "uart.h"
40  #endif
41  
42 #include "cast5-sbox.h"
43
44
45  
46 #define S5(x) pgm_read_dword(&s5[(x)])
47 #define S6(x) pgm_read_dword(&s6[(x)])
48 #define S7(x) pgm_read_dword(&s7[(x)])
49 #define S8(x) pgm_read_dword(&s8[(x)])
50
51 static 
52 void cast5_init_A(uint8_t *dest, uint8_t *src, bool bmode){
53         uint8_t mask = bmode?0x8:0;
54         *((uint32_t*)(&dest[0x0])) = *((uint32_t*)(&src[0x0^mask]))
55                                      ^ S5(src[0xD^mask]) ^ S6(src[0xF^mask]) 
56                                      ^ S7(src[0xC^mask]) ^ S8(src[0xE^mask]) 
57                                      ^ S7(src[0x8^mask]);
58         *((uint32_t*)(&dest[0x4])) = *((uint32_t*)(&src[0x8^mask])) 
59                                      ^ S5(dest[0x0]) ^ S6(dest[0x2]) 
60                                      ^ S7(dest[0x1]) ^ S8(dest[0x3]) 
61                                      ^ S8(src[0xA^mask]);
62         *((uint32_t*)(&dest[0x8])) = *((uint32_t*)(&src[0xC^mask])) 
63                                      ^ S5(dest[0x7]) ^ S6(dest[0x6]) 
64                                      ^ S7(dest[0x5]) ^ S8(dest[0x4]) 
65                                      ^ S5(src[0x9^mask]);
66         *((uint32_t*)(&dest[0xC])) = *((uint32_t*)(&src[0x4^mask])) 
67                                      ^ S5(dest[0xA]) 
68                                      ^ S6(dest[0x9]) 
69                                      ^ S7(dest[0xB]) 
70                                      ^ S8(dest[0x8]) 
71                                      ^ S6(src[0xB^mask]);
72 }
73
74 static
75 void cast5_init_M(uint8_t *dest, uint8_t *src, bool nmode, bool xmode){
76         uint8_t nmt[] = {0xB, 0xA, 0x9, 0x8, 
77                          0xF, 0xE, 0xD, 0xC, 
78                          0x3, 0x2, 0x1, 0x0, 
79                          0x7, 0x6, 0x5, 0x4}; /* nmode table */
80         uint8_t xmt[4][4] = {{0x2, 0x6, 0x9, 0xC}, 
81                              {0x8, 0xD, 0x3, 0x7}, 
82                              {0x3, 0x7, 0x8, 0xD}, 
83                              {0x9, 0xC, 0x2, 0x6}};
84         #define NMT(x) (src[nmode?nmt[(x)]:(x)])
85         #define XMT(x) (src[xmt[(xmode<<1) + nmode][(x)]])
86         *((uint32_t*)(&dest[0x0])) = S5(NMT(0x8)) ^ S6(NMT(0x9)) ^ S7(NMT(0x7)) ^ S8(NMT(0x6)) ^ S5(XMT(0));
87         *((uint32_t*)(&dest[0x4])) = S5(NMT(0xA)) ^ S6(NMT(0xB)) ^ S7(NMT(0x5)) ^ S8(NMT(0x4)) ^ S6(XMT(1));
88         *((uint32_t*)(&dest[0x8])) = S5(NMT(0xC)) ^ S6(NMT(0xD)) ^ S7(NMT(0x3)) ^ S8(NMT(0x2)) ^ S7(XMT(2));
89         *((uint32_t*)(&dest[0xC])) = S5(NMT(0xE)) ^ S6(NMT(0xF)) ^ S7(NMT(0x1)) ^ S8(NMT(0x0)) ^ S8(XMT(3));
90 }
91
92 #define S5B(x) pgm_read_byte(3+(uint8_t*)(&s5[(x)]))
93 #define S6B(x) pgm_read_byte(3+(uint8_t*)(&s6[(x)]))
94 #define S7B(x) pgm_read_byte(3+(uint8_t*)(&s7[(x)]))
95 #define S8B(x) pgm_read_byte(3+(uint8_t*)(&s8[(x)]))
96
97 static
98 void cast5_init_rM(uint8_t *klo, uint8_t *khi, uint8_t offset, uint8_t *src, bool nmode, bool xmode){
99         uint8_t nmt[] = {0xB, 0xA, 0x9, 0x8, 0xF, 0xE, 0xD, 0xC, 0x3, 0x2, 0x1, 0x0, 0x7, 0x6, 0x5, 0x4}; /* nmode table */
100         uint8_t xmt[4][4] = {{0x2, 0x6, 0x9, 0xC}, {0x8, 0xD, 0x3, 0x7}, {0x3, 0x7, 0x8, 0xD}, {0x9, 0xC, 0x2, 0x6}};
101         uint8_t t, h=0; 
102         t = S5B(NMT(0x8)) ^ S6B(NMT(0x9)) ^ S7B(NMT(0x7)) ^ S8B(NMT(0x6)) ^ S5B(XMT(0));
103                 klo[offset*2] |= (t & 0x0f);
104                 h |= (t&0x10); h>>=1;
105         t = S5B(NMT(0xA)) ^ S6B(NMT(0xB)) ^ S7B(NMT(0x5)) ^ S8B(NMT(0x4)) ^ S6B(XMT(1));
106                 klo[offset*2] |= (t<<4) & 0xf0;
107                 h |= t&0x10; h>>=1;
108         t = S5B(NMT(0xC)) ^ S6B(NMT(0xD)) ^ S7B(NMT(0x3)) ^ S8B(NMT(0x2)) ^ S7B(XMT(2));
109                 klo[offset*2+1] |= t&0xf;
110                 h |= t&0x10; h>>=1;
111         t = S5B(NMT(0xE)) ^ S6B(NMT(0xF)) ^ S7B(NMT(0x1)) ^ S8B(NMT(0x0)) ^ S8B(XMT(3));
112                 klo[offset*2+1] |= t<<4;
113                 h |= t&0x10; h >>=1;
114         #ifdef DEBUG
115                 uart_putstr("\r\n\t h="); uart_hexdump(&h,1);
116         #endif
117         khi[offset>>1] |= h<<((offset&0x1)?4:0);
118 }
119
120 #define S_5X(s) pgm_read_dword(&s5[BPX[(s)]])
121 #define S_6X(s) pgm_read_dword(&s6[BPX[(s)]])
122 #define S_7X(s) pgm_read_dword(&s7[BPX[(s)]])
123 #define S_8X(s) pgm_read_dword(&s8[BPX[(s)]])
124
125 #define S_5Z(s) pgm_read_dword(&s5[BPZ[(s)]])
126 #define S_6Z(s) pgm_read_dword(&s6[BPZ[(s)]])
127 #define S_7Z(s) pgm_read_dword(&s7[BPZ[(s)]])
128 #define S_8Z(s) pgm_read_dword(&s8[BPZ[(s)]])
129
130
131
132
133 void cast5_init(const void* key, uint16_t keylength_b, cast5_ctx_t* s){
134          /* we migth return if the key is valid and if setup was sucessfull */
135         uint32_t x[4], z[4];
136         #define BPX ((uint8_t*)&(x[0]))
137         #define BPZ ((uint8_t*)&(z[0]))
138         s->shortkey = (keylength_b<=80);
139         /* littel endian only! */
140         memset(&(x[0]), 0 ,16); /* set x to zero */
141         if(keylength_b > 128)
142                 keylength_b=128;
143         memcpy(&(x[0]), key, (keylength_b+7)/8);
144         
145
146         /* todo: merge a and b and compress the whole stuff */
147         /***** A *****/
148         cast5_init_A((uint8_t*)(&z[0]), (uint8_t*)(&x[0]), false);      
149         /***** M *****/
150         cast5_init_M((uint8_t*)(&(s->mask[0])), (uint8_t*)(&z[0]), false, false);
151         /***** B *****/
152         cast5_init_A((uint8_t*)(&x[0]), (uint8_t*)(&z[0]), true);
153         /***** N *****/
154         cast5_init_M((uint8_t*)(&(s->mask[4])), (uint8_t*)(&x[0]), true, false);
155         /***** A *****/
156         cast5_init_A((uint8_t*)(&z[0]), (uint8_t*)(&x[0]), false);
157         /***** N' *****/
158         cast5_init_M((uint8_t*)(&(s->mask[8])), (uint8_t*)(&z[0]), true, true);
159         /***** B *****/
160         cast5_init_A((uint8_t*)(&x[0]), (uint8_t*)(&z[0]), true);
161         /***** M' *****/
162         cast5_init_M((uint8_t*)(&(s->mask[12])), (uint8_t*)(&x[0]), false, true);
163         
164         /* that were the masking keys, now the rotation keys */
165         /* set the keys to zero */
166         memset(&(s->rotl[0]),0,8);
167         s->roth[0]=s->roth[1]=0;
168         /***** A *****/
169         cast5_init_A((uint8_t*)(&z[0]), (uint8_t*)(&x[0]), false);
170         /***** M *****/
171         cast5_init_rM(&(s->rotl[0]), &(s->roth[0]), 0, (uint8_t*)(&z[0]), false, false);
172         /***** B *****/
173         cast5_init_A((uint8_t*)(&x[0]), (uint8_t*)(&z[0]), true);
174         /***** N *****/
175         cast5_init_rM(&(s->rotl[0]), &(s->roth[0]), 1, (uint8_t*)(&x[0]), true, false);
176         /***** A *****/
177         cast5_init_A((uint8_t*)(&z[0]), (uint8_t*)(&x[0]), false);
178         /***** N' *****/
179         cast5_init_rM(&(s->rotl[0]), &(s->roth[0]), 2, (uint8_t*)(&z[0]), true, true);
180         /***** B *****/
181         cast5_init_A((uint8_t*)(&x[0]), (uint8_t*)(&z[0]), true);
182         /***** M' *****/
183         cast5_init_rM(&(s->rotl[0]), &(s->roth[0]), 3, (uint8_t*)(&x[0]), false, true);
184         /* done ;-) */
185 }
186
187
188
189 /********************************************************************************************************/
190
191 #define ROTL32(a,n) ((a)<<(n) | (a)>>(32-(n)))
192 #define CHANGE_ENDIAN32(x) ((x)<<24 | (x)>>24 | ((x)&0xff00)<<8 | ((x)&0xff0000)>>8 )
193
194 typedef uint32_t cast5_f_t(uint32_t,uint32_t,uint8_t);
195
196 #define IA 3
197 #define IB 2
198 #define IC 1
199 #define ID 0
200
201 static
202 uint32_t cast5_f1(uint32_t d, uint32_t m, uint8_t r){
203         uint32_t t;
204         t = ROTL32((d + m),r);
205 #ifdef DEBUG
206         uint32_t ia,ib,ic,id;
207         uart_putstr("\r\n f1("); uart_hexdump(&d, 4); uart_putc(',');
208                 uart_hexdump(&m , 4); uart_putc(','); uart_hexdump(&r, 1);uart_putstr("): I=");
209                 uart_hexdump(&t, 4);
210         ia = pgm_read_dword(&s1[((uint8_t*)&t)[IA]] );
211         ib = pgm_read_dword(&s2[((uint8_t*)&t)[IB]] );
212         ic = pgm_read_dword(&s3[((uint8_t*)&t)[IC]] );
213         id = pgm_read_dword(&s4[((uint8_t*)&t)[ID]] );
214         uart_putstr("\r\n\tIA="); uart_hexdump(&ia, 4);
215         uart_putstr("\r\n\tIB="); uart_hexdump(&ib, 4);
216         uart_putstr("\r\n\tIC="); uart_hexdump(&ic, 4);
217         uart_putstr("\r\n\tID="); uart_hexdump(&id, 4);
218
219         return (((ia ^ ib) - ic) + id);
220
221 #else
222         
223         return (((  pgm_read_dword(&s1[((uint8_t*)&t)[IA]]) 
224                   ^ pgm_read_dword(&s2[((uint8_t*)&t)[IB]]) ) 
225                   - pgm_read_dword(&s3[((uint8_t*)&t)[IC]]) ) 
226                   + pgm_read_dword(&s4[((uint8_t*)&t)[ID]]) );
227
228 #endif
229 }
230
231 static
232 uint32_t cast5_f2(uint32_t d, uint32_t m, uint8_t r){
233         uint32_t t;
234         t = ROTL32((d ^ m),r);
235 #ifdef DEBUG
236         uint32_t ia,ib,ic,id;
237         uart_putstr("\r\n f2("); uart_hexdump(&d, 4); uart_putc(',');
238                 uart_hexdump(&m , 4); uart_putc(','); uart_hexdump(&r, 1);uart_putstr("): I=");
239                 uart_hexdump(&t, 4);
240
241         ia = pgm_read_dword(&s1[((uint8_t*)&t)[IA]] );
242         ib = pgm_read_dword(&s2[((uint8_t*)&t)[IB]] );
243         ic = pgm_read_dword(&s3[((uint8_t*)&t)[IC]] );
244         id = pgm_read_dword(&s4[((uint8_t*)&t)[ID]] );
245         
246         uart_putstr("\r\n\tIA="); uart_hexdump(&ia, 4);
247         uart_putstr("\r\n\tIB="); uart_hexdump(&ib, 4);
248         uart_putstr("\r\n\tIC="); uart_hexdump(&ic, 4);
249         uart_putstr("\r\n\tID="); uart_hexdump(&id, 4);
250
251         return (((ia - ib) + ic) ^ id);
252 #else
253         
254         return (((    pgm_read_dword(&s1[((uint8_t*)&t)[IA]]) 
255                     - pgm_read_dword(&s2[((uint8_t*)&t)[IB]]) ) 
256                     + pgm_read_dword(&s3[((uint8_t*)&t)[IC]]) ) 
257                     ^ pgm_read_dword(&s4[((uint8_t*)&t)[ID]]) );
258
259 #endif
260 }
261
262 static
263 uint32_t cast5_f3(uint32_t d, uint32_t m, uint8_t r){
264         uint32_t t;
265         t = ROTL32((m - d),r);
266
267 #ifdef DEBUG
268         uint32_t ia,ib,ic,id;
269
270         uart_putstr("\r\n f3("); uart_hexdump(&d, 4); uart_putc(',');
271                 uart_hexdump(&m , 4); uart_putc(','); uart_hexdump(&r, 1);uart_putstr("): I=");
272                 uart_hexdump(&t, 4);
273
274         ia = pgm_read_dword(&s1[((uint8_t*)&t)[IA]] );
275         ib = pgm_read_dword(&s2[((uint8_t*)&t)[IB]] );
276         ic = pgm_read_dword(&s3[((uint8_t*)&t)[IC]] );
277         id = pgm_read_dword(&s4[((uint8_t*)&t)[ID]] );
278         
279         uart_putstr("\r\n\tIA="); uart_hexdump(&ia, 4);
280         uart_putstr("\r\n\tIB="); uart_hexdump(&ib, 4);
281         uart_putstr("\r\n\tIC="); uart_hexdump(&ic, 4);
282         uart_putstr("\r\n\tID="); uart_hexdump(&id, 4);
283         return (((ia + ib) ^ ic) - id);
284 #else
285         return ((  pgm_read_dword(&s1[((uint8_t*)&t)[IA]] )
286                  + pgm_read_dword(&s2[((uint8_t*)&t)[IB]] )) 
287                  ^ pgm_read_dword(&s3[((uint8_t*)&t)[IC]] )) 
288                  - pgm_read_dword(&s4[((uint8_t*)&t)[ID]] );
289
290 #endif
291 }
292
293 /******************************************************************************/
294
295 void cast5_enc(void* block, const cast5_ctx_t *s){
296         uint32_t l,r, x, y;
297         uint8_t i;
298         cast5_f_t* f[]={cast5_f1,cast5_f2,cast5_f3};
299         l=((uint32_t*)block)[0];
300         r=((uint32_t*)block)[1];
301 //      uart_putstr("\r\n round[-1] = ");
302 //      uart_hexdump(&r, 4);
303         for (i=0;i<(s->shortkey?12:16);++i){
304                 x = r;
305                 y = (f[i%3])(CHANGE_ENDIAN32(r), CHANGE_ENDIAN32(s->mask[i]), 
306                         (((s->roth[i>>3]) & (1<<(i&0x7)))?0x10:0x00) 
307                          + ( ((s->rotl[i>>1])>>((i&1)?4:0)) & 0x0f) );
308                 r = l ^ CHANGE_ENDIAN32(y);
309 //              uart_putstr("\r\n round["); DEBUG_B(i); uart_putstr("] = ");
310 //              uart_hexdump(&r, 4);
311                 l = x;
312         }
313         ((uint32_t*)block)[0]=r;
314         ((uint32_t*)block)[1]=l;
315 }
316
317 /******************************************************************************/
318
319 void cast5_dec(void* block, const cast5_ctx_t *s){
320         uint32_t l,r, x, y;
321         int8_t i, rounds;
322         cast5_f_t* f[]={cast5_f1,cast5_f2,cast5_f3};
323         l=((uint32_t*)block)[0];
324         r=((uint32_t*)block)[1];
325         rounds = (s->shortkey?12:16);
326         for (i=rounds-1; i>=0 ;--i){
327                 x = r;
328                 y = (f[i%3])(CHANGE_ENDIAN32(r), CHANGE_ENDIAN32(s->mask[i]), 
329                         (((s->roth[i>>3]) & (1<<(i&0x7)))?0x10:0x00) 
330                          + ( ((s->rotl[i>>1])>>((i&1)?4:0)) & 0x0f) );
331                 r = l ^ CHANGE_ENDIAN32(y);
332                 l = x;
333         }
334         ((uint32_t*)block)[0]=r;
335         ((uint32_t*)block)[1]=l;
336 }
337
338
339 /******************************************************************************/
340
341
342
343