10 #include <avr/pgmspace.h>
14 #include <util/delay.h>
16 /*****************************************************************************/
18 uint8_t rol(uint8_t a, uint8_t n){return ((a<<n) | (a>>(8-n)));}
20 /*****************************************************************************/
22 uint8_t ror(uint8_t a, uint8_t n){return ((a<<(8-n)) | (a>>n));}
24 /*****************************************************************************/
26 uint32_t rol32(uint32_t a, uint8_t n){
27 return ((a<<n)|(a>>(32-n)));
30 /*****************************************************************************/
32 uint64_t rol64(uint64_t a, uint8_t n){
33 return ((a<<n)|(a>>(64-n)));
36 /*****************************************************************************/
38 uint8_t camellia_s1_table[256] PROGMEM = {
39 112, 130, 44, 236, 179, 39, 192, 229, 228, 133, 87, 53, 234, 12, 174, 65,
40 35, 239, 107, 147, 69, 25, 165, 33, 237, 14, 79, 78, 29, 101, 146, 189,
41 134, 184, 175, 143, 124, 235, 31, 206, 62, 48, 220, 95, 94, 197, 11, 26,
42 166, 225, 57, 202, 213, 71, 93, 61, 217, 1, 90, 214, 81, 86, 108, 77,
43 139, 13, 154, 102, 251, 204, 176, 45, 116, 18, 43, 32, 240, 177, 132, 153,
44 223, 76, 203, 194, 52, 126, 118, 5, 109, 183, 169, 49, 209, 23, 4, 215,
45 20, 88, 58, 97, 222, 27, 17, 28, 50, 15, 156, 22, 83, 24, 242, 34,
46 254, 68, 207, 178, 195, 181, 122, 145, 36, 8, 232, 168, 96, 252, 105, 80,
47 170, 208, 160, 125, 161, 137, 98, 151, 84, 91, 30, 149, 224, 255, 100, 210,
48 16, 196, 0, 72, 163, 247, 117, 219, 138, 3, 230, 218, 9, 63, 221, 148,
49 135, 92, 131, 2, 205, 74, 144, 51, 115, 103, 246, 243, 157, 127, 191, 226,
50 82, 155, 216, 38, 200, 55, 198, 59, 129, 150, 111, 75, 19, 190, 99, 46,
51 233, 121, 167, 140, 159, 110, 188, 142, 41, 245, 249, 182, 47, 253, 180, 89,
52 120, 152, 6, 106, 231, 70, 113, 186, 212, 37, 171, 66, 136, 162, 141, 250,
53 114, 7, 185, 85, 248, 238, 172, 10, 54, 73, 42, 104, 60, 56, 241, 164,
54 64, 40, 211, 123, 187, 201, 67, 193, 21, 227, 173, 244, 119, 199, 128, 158
57 /*****************************************************************************/
59 uint8_t camellia_s1(uint8_t b){
60 return pgm_read_byte_near(&(camellia_s1_table[b]));
63 /*****************************************************************************/
65 uint8_t camellia_s2(uint8_t b){
66 return rol(pgm_read_byte_near(&(camellia_s1_table[b])),1);
69 /*****************************************************************************/
71 uint8_t camellia_s3(uint8_t b){
72 return ror(pgm_read_byte_near(&(camellia_s1_table[b])),1);
75 /*****************************************************************************/
77 uint8_t camellia_s4(uint8_t b){
78 return pgm_read_byte_near(&(camellia_s1_table[rol(b,1)]));
81 /*****************************************************************************/
83 uint64_t camellia_s(uint64_t d){
84 // uart_putstr("\n\r S von "); uart_hexdump(&(d), 8);
85 #define D ((uint8_t*)(&d))
86 D[7] = camellia_s1(D[7]);
87 D[6] = camellia_s2(D[6]);
88 D[5] = camellia_s3(D[5]);
89 D[4] = camellia_s4(D[4]);
91 D[3] = camellia_s2(D[3]);
92 D[2] = camellia_s3(D[2]);
93 D[1] = camellia_s4(D[1]);
94 D[0] = camellia_s1(D[0]);
96 // uart_putstr(" ist "); uart_hexdump(&(d), 8);
100 /*****************************************************************************/
102 uint64_t camellia_p(uint64_t d){
104 #define D ((uint8_t*)(&d))
105 #define Z ((uint8_t*)(&z))
107 Z[0] = D[4] ^ D[3] ^ D[1];
108 Z[1] = D[5] ^ D[0] ^ D[2];
109 Z[2] = D[6] ^ D[1] ^ D[3];
110 Z[3] = D[7] ^ D[2] ^ D[0];
111 Z[4] = D[0] ^ D[6] ^ D[5];
112 Z[5] = D[1] ^ D[7] ^ D[6];
113 Z[6] = D[2] ^ D[4] ^ D[7];
114 Z[7] = D[3] ^ D[5] ^ D[4];
116 // Z[7] = z1 z3 z4 z6 z7 z8
117 // uart_putstr("\n\r P von "); uart_hexdump(&(d), 8);
119 Z[7] = D[7] ^ D[5] ^ D[4] ^ D[2] ^ D[1] ^ D[0];
120 Z[6] = D[7] ^ D[6] ^ D[4] ^ D[3] ^ D[1] ^ D[0];
121 Z[5] = D[7] ^ D[6] ^ D[5] ^ D[3] ^ D[2] ^ D[0];
122 Z[4] = D[6] ^ D[5] ^ D[4] ^ D[3] ^ D[2] ^ D[1] ;
123 Z[3] = D[7] ^ D[6] ^ D[2] ^ D[1] ^ D[0];
124 Z[2] = D[6] ^ D[5] ^ D[3] ^ D[1] ^ D[0];
125 Z[1] = D[5] ^ D[4] ^ D[3] ^ D[2] ^ D[0];
126 Z[0] = D[7] ^ D[4] ^ D[3] ^ D[2] ^ D[1] ;
128 // uart_putstr(" ist "); uart_hexdump(&(z), 8);
135 /*****************************************************************************/
137 uint64_t camellia_f(uint64_t x, uint64_t k){
139 y = camellia_p(camellia_s(x ^ k));
141 uart_putstr("\r\nEfunc X=");
142 uart_hexdump(&(x), 8);
144 uart_hexdump(&(k), 8);
146 uart_hexdump(&(y), 8);
151 /*****************************************************************************/
153 uint64_t camellia_fl(uint64_t x, uint64_t k){
154 volatile uint64_t lx[1], lk[1], y[1];
157 #define Y ((uint32_t*)y)
158 #define X ((uint32_t*)lx)
159 #define K ((uint32_t*)lk)
161 Y[0] = rol32((X[1]) & K[1],1) ^ (X[0]); /* Yr */
162 Y[1] = (Y[0] | K[0]) ^ (X[1]); /* Yl */
165 uart_putstr("\r\nFL(");
166 uart_hexdump(&(x), 8);
168 uart_hexdump(&(k), 8);
178 /*****************************************************************************/
180 uint64_t camellia_fl_inv(uint64_t y, uint64_t k){
181 //volatile uint32_t xl, xr;
182 volatile uint64_t ly[1], lk[1], x[1];
184 #define Y ((uint32_t*)ly)
185 #define X ((uint32_t*)x)
186 #define K ((uint32_t*)lk)
188 X[1]=(Y[0] | K[0]) ^ Y[1];
189 X[0]=rol32((X[1] & K[1]),1) ^ Y[0];
192 uart_putstr("\r\nFL_inv(");
193 uart_hexdump(&(y), 8);
195 uart_hexdump(&(k), 8);
204 /*****************************************************************************/
206 uint64_t camellia_sigma[6]={
207 0xA09E667F3BCC908BLL,
208 0xB67AE8584CAA73B2LL,
209 0xC6EF372FE94F82BELL,
210 0x54FF53A5F1D36F1CLL,
211 0x10E527FADE682D1DLL,
215 /*****************************************************************************/
217 void camellia128_ctx_dump(camellia128_ctx_t *s){
218 uart_putstr("\r\n==State Dump==");
219 uart_putstr("\n\rKAl: "); uart_hexdump(&(s->kal), 8);
220 uart_putstr("\n\rKAr: "); uart_hexdump(&(s->kar), 8);
221 uart_putstr("\n\rKLl: "); uart_hexdump(&(s->kll), 8);
222 uart_putstr("\n\rKLr: "); uart_hexdump(&(s->klr), 8);
226 /*****************************************************************************/
228 void camellia128_init(camellia128_ctx_t* s, uint8_t* key){
230 s->kll = 0; //((uint64_t*)key)[0];
232 /* load the key, endian-adjusted, to kll,klr */
245 s->kar ^= camellia_f(s->kal, camellia_sigma[0]);
246 s->kal ^= camellia_f(s->kar, camellia_sigma[1]);
251 s->kar ^= camellia_f(s->kal, camellia_sigma[2]);
252 s->kal ^= camellia_f(s->kar, camellia_sigma[3]);
254 // uart_putstr("\n\r----------------init finished--------------------");
257 /*****************************************************************************/
259 void camellia128_keyop(camellia128_ctx_t* s, int8_t q){
260 /* first we do 16 bit left-rols for kl and ka (128bit each) */
263 temp = (s->kal)>>(64-16-q);
264 s->kal = s->kal<<(16+q) | s->kar>>(64-16-q);
265 s->kar = s->kar<<(16+q) | temp;
267 temp = (s->kll)>>(64-16-q);
268 s->kll = s->kll<<(16+q) | s->klr>>(64-16-q);
269 s->klr = s->klr<<(16+q) | temp;
270 /* after doing the 16-bit rol we have to rol 1 bit left or rigth depending on q */
273 /*****************************************************************************/
275 void camellia128_keyop_inv(camellia128_ctx_t* s, int8_t q){
276 /* first we do 16 bit right-rols for kl and ka (128bit each) */
279 temp = (s->kar)&(0xffffff>>(24-16-q));
280 s->kar = s->kar>>(16+q) | s->kal<<(64-16-q);
281 s->kal = s->kal>>(16+q) | ((uint64_t)temp)<<(64-16-q);
283 temp = (s->klr)&(0xffffff>>(24-16-q));
284 s->klr = s->klr>>(16+q) | s->kll<<(64-16-q);
285 s->kll = s->kll>>(16+q) | ((uint64_t)temp)<<(64-16-q);
286 /* after doing the 16-bit rol we have to rol 1 bit left or rigth depending on q */
289 /*****************************************************************************/
294 #define KEY_POSTC1 0x00
295 #define KEY_POSTC2 0x01
296 #define KEY_INC2 0x02
299 #define KEY_DIR_NORM 0x00
300 #define KEY_DIR_INV 0x04
302 #define KEY_AMMOUNT 0x08
303 #define KEY_ROL17 0x08
304 #define KEY_ROL15 0x00
306 void camellia_6rounds(camellia128_ctx_t* s, uint64_t* bl, uint64_t* br, uint8_t roundop, uint8_t keychoice){
313 for(i=0; i<3; ++i){ /* each cycle */
314 br[0] ^= camellia_f(bl[0],*(k[(keychoice&1)*2+((roundop&KEY_DIR)?1:0)]));
317 if((i == 1) && (roundop&KEY_INC2)){
318 ((roundop&KEY_DIR)?camellia128_keyop_inv:camellia128_keyop)(s,(roundop&KEY_AMMOUNT)?1:-1);
321 bl[0] ^= camellia_f(br[0],*(k[(keychoice&1)*2+((roundop&KEY_DIR)?0:1)]));
324 /* check if we should do some keyop */
325 if((i == (roundop&1)) && (!(roundop&KEY_INC2)) ){
326 ((roundop&KEY_DIR)?camellia128_keyop_inv:camellia128_keyop)(s,(roundop&KEY_AMMOUNT)?1:-1);
327 /* isn't it fuckin nice what we can do in C?! */
332 /*****************************************************************************/
335 void camellia128_enc(camellia128_ctx_t* s, void* block){
337 #define BL (((uint64_t*)block)[0])
338 #define BR (((uint64_t*)block)[1])
339 /* endian adjustment */
348 BL = temp64 >> 56 | temp64 << 56 /* swap the most out bytes (1 & 8) */
349 | (temp64 & (0xffLL<<48))>>(5*8) | (temp64 & (0xffLL<< 8))<<(5*8) /* 2 & 7 */
350 | (temp64 & (0xffLL<<40))>>(3*8) | (temp64 & (0xffLL<<16))<<(3*8) /* 3 & 6 */
351 | (temp64 & (0xffLL<<32))>>(1*8) | (temp64 & (0xffLL<<24))<<(1*8); /* 4 & 5 */
353 BR = temp64 >> 56 | temp64 << 56 /* swap the most out bytes (1 & 8) */
354 | (temp64 & (0xffLL<<48))>>(5*8) | (temp64 & (0xffLL<< 8))<<(5*8) /* 2 & 7 */
355 | (temp64 & (0xffLL<<40))>>(3*8) | (temp64 & (0xffLL<<16))<<(3*8) /* 3 & 6 */
356 | (temp64 & (0xffLL<<32))>>(1*8) | (temp64 & (0xffLL<<24))<<(1*8); /* 4 & 5 */
364 BR ^= camellia_f(BL, s->kal);
365 BL ^= camellia_f(BR, s->kar);
366 camellia128_keyop(s, -1);
367 BR ^= camellia_f(BL, s->kll);
368 BL ^= camellia_f(BR, s->klr);
369 BR ^= camellia_f(BL, s->kal);
370 BL ^= camellia_f(BR, s->kar);
372 camellia_6rounds(s, &BL, &BR, KEY_ROL15 | KEY_DIR_NORM | KEY_POSTC1 , 0x33);
374 camellia128_keyop(s, -1);
375 BL = camellia_fl(BL, s->kal);
376 BR = camellia_fl_inv(BR, s->kar);
377 camellia128_keyop(s, -1);
379 /* BR ^= camellia_f(BL, s->kll);
380 BL ^= camellia_f(BR, s->klr);
381 BR ^= camellia_f(BL, s->kal);
382 camellia128_keyop(s, -1);
383 BL ^= camellia_f(BR, s->klr);
384 BR ^= camellia_f(BL, s->kal);
385 BL ^= camellia_f(BR, s->kar);
387 camellia_6rounds(s, &BL, &BR, KEY_ROL15 | KEY_DIR_NORM | KEY_INC2 , 0x34);
389 camellia128_keyop(s, 1);
390 BL = camellia_fl(BL, s->kll);
391 BR = camellia_fl_inv(BR, s->klr);
392 camellia128_keyop(s, 1);
394 /* BR ^= camellia_f(BL, s->kll);
395 BL ^= camellia_f(BR, s->klr);
396 BR ^= camellia_f(BL, s->kal);
397 BL ^= camellia_f(BR, s->kar);
398 camellia128_keyop(s, 1);
400 BR ^= camellia_f(BL, s->kll);
401 BL ^= camellia_f(BR, s->klr);
403 camellia_6rounds(s, &BL, &BR, KEY_ROL17 | KEY_DIR_NORM | KEY_POSTC2 , 0x0C);
413 BL = temp64 >> 56 | temp64 << 56 /* swap the most out bytes (1 & 8) */
414 | (temp64 & (0xffLL<<48))>>(5*8) | (temp64 & (0xffLL<< 8))<<(5*8) /* 2 & 7 */
415 | (temp64 & (0xffLL<<40))>>(3*8) | (temp64 & (0xffLL<<16))<<(3*8) /* 3 & 6 */
416 | (temp64 & (0xffLL<<32))>>(1*8) | (temp64 & (0xffLL<<24))<<(1*8); /* 4 & 5 */
418 BR = temp64 >> 56 | temp64 << 56 /* swap the most out bytes (1 & 8) */
419 | (temp64 & (0xffLL<<48))>>(5*8) | (temp64 & (0xffLL<< 8))<<(5*8) /* 2 & 7 */
420 | (temp64 & (0xffLL<<40))>>(3*8) | (temp64 & (0xffLL<<16))<<(3*8) /* 3 & 6 */
421 | (temp64 & (0xffLL<<32))>>(1*8) | (temp64 & (0xffLL<<24))<<(1*8); /* 4 & 5 */
427 /*****************************************************************************/
429 void camellia128_dec(camellia128_ctx_t* s, void* block){
431 #define BL (((uint64_t*)block)[1])
432 #define BR (((uint64_t*)block)[0])
433 /* endian adjustment */
443 BL = temp64 >> 56 | temp64 << 56 /* swap the most out bytes (1 & 8) */
444 | (temp64 & (0xffLL<<48))>>(5*8) | (temp64 & (0xffLL<< 8))<<(5*8) /* 2 & 7 */
445 | (temp64 & (0xffLL<<40))>>(3*8) | (temp64 & (0xffLL<<16))<<(3*8) /* 3 & 6 */
446 | (temp64 & (0xffLL<<32))>>(1*8) | (temp64 & (0xffLL<<24))<<(1*8); /* 4 & 5 */
448 BR = temp64 >> 56 | temp64 << 56 /* swap the most out bytes (1 & 8) */
449 | (temp64 & (0xffLL<<48))>>(5*8) | (temp64 & (0xffLL<< 8))<<(5*8) /* 2 & 7 */
450 | (temp64 & (0xffLL<<40))>>(3*8) | (temp64 & (0xffLL<<16))<<(3*8) /* 3 & 6 */
451 | (temp64 & (0xffLL<<32))>>(1*8) | (temp64 & (0xffLL<<24))<<(1*8); /* 4 & 5 */
453 camellia128_keyop_inv(s, 1);
455 BR ^= s->kal; /* kw3 */
456 BL ^= s->kar; /* kw4 */
460 BL ^= camellia_f(BR, s->klr); /* k18 * /
461 BR ^= camellia_f(BL, s->kll); /* k17 * /
462 camellia128_keyop_inv(s, 1);
463 BL ^= camellia_f(BR, s->kar);
464 BR ^= camellia_f(BL, s->kal);
465 BL ^= camellia_f(BR, s->klr);
466 BR ^= camellia_f(BL, s->kll);
468 camellia_6rounds(s, &BR, &BL, KEY_ROL17 | KEY_DIR_INV | KEY_POSTC1 , 0x0C);
471 camellia128_keyop_inv(s, 1);
472 BR = camellia_fl(BR, s->klr);
473 BL = camellia_fl_inv(BL, s->kll);
474 camellia128_keyop_inv(s, 1);
476 /* BL ^= camellia_f(BR, s->kar);
477 BR ^= camellia_f(BL, s->kal);
478 BL ^= camellia_f(BR, s->klr);
479 camellia128_keyop_inv(s, -1);
480 BR ^= camellia_f(BL, s->kal);
481 BL ^= camellia_f(BR, s->klr);
482 BR ^= camellia_f(BL, s->kll);
484 camellia_6rounds(s, &BR, &BL, KEY_ROL15 | KEY_DIR_INV | KEY_INC2 , 0x0B);
487 camellia128_keyop_inv(s, -1);
488 BR = camellia_fl(BR, s->kar);
489 BL = camellia_fl_inv(BL, s->kal);
490 camellia128_keyop_inv(s, -1);
493 BL ^= camellia_f(BR, s->kar);
494 BR ^= camellia_f(BL, s->kal);
495 BL ^= camellia_f(BR, s->klr);
496 BR ^= camellia_f(BL, s->kll);
497 camellia128_keyop_inv(s, -1);
498 BL ^= camellia_f(BR, s->kar);
499 BR ^= camellia_f(BL, s->kal);
501 camellia_6rounds(s, &BR, &BL, KEY_ROL15 | KEY_DIR_INV | KEY_POSTC2 , 0x33);
504 BL ^= s->kll; /* kw1 */
505 BR ^= s->klr; /* kw2 */
512 BL = temp64 >> 56 | temp64 << 56 /* swap the most out bytes (1 & 8) */
513 | (temp64 & (0xffLL<<48))>>(5*8) | (temp64 & (0xffLL<< 8))<<(5*8) /* 2 & 7 */
514 | (temp64 & (0xffLL<<40))>>(3*8) | (temp64 & (0xffLL<<16))<<(3*8) /* 3 & 6 */
515 | (temp64 & (0xffLL<<32))>>(1*8) | (temp64 & (0xffLL<<24))<<(1*8); /* 4 & 5 */
517 BR = temp64 >> 56 | temp64 << 56 /* swap the most out bytes (1 & 8) */
518 | (temp64 & (0xffLL<<48))>>(5*8) | (temp64 & (0xffLL<< 8))<<(5*8) /* 2 & 7 */
519 | (temp64 & (0xffLL<<40))>>(3*8) | (temp64 & (0xffLL<<16))<<(3*8) /* 3 & 6 */
520 | (temp64 & (0xffLL<<32))>>(1*8) | (temp64 & (0xffLL<<24))<<(1*8); /* 4 & 5 */
524 /*****************************************************************************/
525 /*****************************************************************************/