6 * Description: Implementation of the camellia block cipher algorithm.
17 /* push r18 - r27, r30 - r31*/
82 /* X points to Block */
83 .macro dbg_hexdump length
96 .byte 112, 130, 44, 236, 179, 39, 192, 229, 228, 133, 87, 53, 234, 12, 174, 65
97 .byte 35, 239, 107, 147, 69, 25, 165, 33, 237, 14, 79, 78, 29, 101, 146, 189
98 .byte 134, 184, 175, 143, 124, 235, 31, 206, 62, 48, 220, 95, 94, 197, 11, 26
99 .byte 166, 225, 57, 202, 213, 71, 93, 61, 217, 1, 90, 214, 81, 86, 108, 77
100 .byte 139, 13, 154, 102, 251, 204, 176, 45, 116, 18, 43, 32, 240, 177, 132, 153
101 .byte 223, 76, 203, 194, 52, 126, 118, 5, 109, 183, 169, 49, 209, 23, 4, 215
102 .byte 20, 88, 58, 97, 222, 27, 17, 28, 50, 15, 156, 22, 83, 24, 242, 34
103 .byte 254, 68, 207, 178, 195, 181, 122, 145, 36, 8, 232, 168, 96, 252, 105, 80
104 .byte 170, 208, 160, 125, 161, 137, 98, 151, 84, 91, 30, 149, 224, 255, 100, 210
105 .byte 16, 196, 0, 72, 163, 247, 117, 219, 138, 3, 230, 218, 9, 63, 221, 148
106 .byte 135, 92, 131, 2, 205, 74, 144, 51, 115, 103, 246, 243, 157, 127, 191, 226
107 .byte 82, 155, 216, 38, 200, 55, 198, 59, 129, 150, 111, 75, 19, 190, 99, 46
108 .byte 233, 121, 167, 140, 159, 110, 188, 142, 41, 245, 249, 182, 47, 253, 180, 89
109 .byte 120, 152, 6, 106, 231, 70, 113, 186, 212, 37, 171, 66, 136, 162, 141, 250
110 .byte 114, 7, 185, 85, 248, 238, 172, 10, 54, 73, 42, 104, 60, 56, 241, 164
111 .byte 64, 40, 211, 123, 187, 201, 67, 193, 21, 227, 173, 244, 119, 199, 128, 158
113 //.global camellia_sigma
116 .quad 0xA09E667F3BCC908B
117 .quad 0xB67AE8584CAA73B2
118 .quad 0xC6EF372FE94F82BE
119 .quad 0x54FF53A5F1D36F1C
120 .quad 0x10E527FADE682D1D
121 .quad 0xB05688C2B3E6C1FD
126 /* uint8_t camellia_s1(uint8_t b) */
129 ldi r30, lo8(camellia_sbox)
130 ldi r31, hi8(camellia_sbox)
139 ldi r30, lo8(camellia_sbox)
140 ldi r31, hi8(camellia_sbox)
151 ldi r30, lo8(camellia_sbox)
152 ldi r31, hi8(camellia_sbox)
164 ldi r30, lo8(camellia_sbox)
165 ldi r31, hi8(camellia_sbox)
175 /* uint64_t camellia_s(uint64_t d){
176 #define D ((uint8_t*)(&d))
177 D[7] = camellia_s1(D[7]); // MSB
178 D[6] = camellia_s2(D[6]);
179 D[5] = camellia_s3(D[5]);
180 D[4] = camellia_s4(D[4]);
182 D[3] = camellia_s2(D[3]);
183 D[2] = camellia_s3(D[2]);
184 D[1] = camellia_s4(D[1]);
185 D[0] = camellia_s1(D[0]); // LSB
190 ; d: r18-r25 (r18 is LSB)
192 movw r26, r24 ; backup r24,r25 -> X
229 ;##############################################################################
230 /* uint64_t camellia_p(uint64_t d) */
231 ; param: r18-r25 (r18 is LSB)
262 movw r30, z6 ; backup z5 bis z8
270 ;##############################################################################
272 /* uint64_t camellia_f(uint64_t x, uint64_t k) */
289 ;##############################################################################
291 /* uint64_t camellia_fl(uint64_t x, uint64_t k) */
292 ; param x: r18-r25 xl: r22-r25, xr: r18-r21
293 ; param k: r10-r17 kl: r14-r17, kr: r10-r13
337 ;##############################################################################
339 /* uint64_t camellia_fl_inv(uint64_t y, uint64_t k) */
340 ; param y: r18-r25 yl: r22-r25, yr: r18-r21
341 ; param k: r10-r17 kl: r14-r17, kr: r10-r13
358 .global camellia_fl_inv
368 // the first one is done
385 ;##############################################################################
390 .global camellia128_keyop_rot15
391 camellia128_keyop_rot15:
392 movw r30, r24 ; Z points at LSB of kl ;-- 0
394 2: adiw r30, 15 ;-- 15
397 movw B1, r20 ; store Backup of the 2 MSB of kl
401 1: ld r20, -Z ;-- 13..0
403 std Z+2, r20 ;-- (15..2)
417 ;##############################################################################
420 .global camellia128_keyop_rot17
421 camellia128_keyop_rot17:
482 ;##############################################################################
485 .global camellia128_keyop
488 breq camellia128_keyop_rot17
489 rjmp camellia128_keyop_rot15
491 ;##############################################################################
496 .global camellia128_keyop_inv_rot15
497 camellia128_keyop_inv_rot15:
498 movw r30, r24 ; Z points at LSB of kl ;-- 0
499 movw r26, r24 ; X also
504 movw B1, r20 ; store Backup of the 2 LSB of kl
508 1: ld r21, Z+ ;-- 2/14..3/16
510 st X+, r21 ;-- (0..13)/(1..14)
523 ;##############################################################################
526 .global camellia128_keyop_inv_rot17
527 camellia128_keyop_inv_rot17:
589 ;##############################################################################
592 .global camellia128_keyop_inv
593 camellia128_keyop_inv:
595 breq camellia128_keyop_inv_rot17
596 rjmp camellia128_keyop_inv_rot15
598 ;##############################################################################
599 ; param p: r24-r25 pointer to data
600 ; param l: r22 length of word
601 .global change_endian
617 ;##############################################################################
621 #define KEY_POSTC1 0x00
622 #define KEY_POSTC2 0x01
623 #define KEY_INC2 0x02
625 #define KEY_DIR_NORM 0x00
626 #define KEY_DIR_INV 0x04
627 #define KEY_AMMOUNT 0x08
628 #define KEY_ROL17 0x08
629 #define KEY_ROL15 0x00
631 void camellia_6rounds(camellia128_ctx_t* s, uint64_t* bl, uint64_t* br, uint8_t roundop, uint8_t keychoice){
638 for(i=0; i<3; ++i){ / * each cycle * /
639 br[0] ^= camellia_f(bl[0],*(k[(keychoice&1)*2+((roundop&KEY_DIR)?1:0)]));
642 if((i == 1) && (roundop&KEY_INC2)){
643 ((roundop&KEY_DIR)?camellia128_keyop_inv:camellia128_keyop)(s,(roundop&KEY_AMMOUNT)?1:-1);
646 bl[0] ^= camellia_f(br[0],*(k[(keychoice&1)*2+((roundop&KEY_DIR)?0:1)]));
649 / * check if we should do some keyop * /
650 if((i == (roundop&1)) && (!(roundop&KEY_INC2)) ){
651 ((roundop&KEY_DIR)?camellia128_keyop_inv:camellia128_keyop)(s,(roundop&KEY_AMMOUNT)?1:-1);
652 / * isn't it fuckin nice what we can do in C?! * /
661 ; param keychoice: r16
680 .global camellia_6rounds
703 rol keyop_time // keyop_time == 3
704 SBRC xro, 1 // KEY_INC2
706 SBRS xro, 0 // KEY_POSTC1
708 SBRS xro, 0 // KEY_POSTC1
714 /* now we load the key to r18-r25 */
716 SBRC kc, 0 /* select between KA and KL */
718 SBRC xro_sec, 2 // KEY_DIR
720 SBRS loop_cnt, 0 /* enc */
723 2: SBRC loop_cnt, 0 /* dec */
736 /* now we xor bl in */
754 /* f(x,k) = p(s(x xor k)) ; xor is done */
762 /* now we have to xor the result into br */
766 ; ldi r1, 8 ;-- this won't work
778 /* check for keyop */
779 cp loop_cnt, keyop_time
783 SBRS xro_sec, 3 // KEY_ROL17
785 SBRS xro_sec, 2 // KEY_DIR
787 call camellia128_keyop_inv
789 2: call camellia128_keyop
791 SWAP_R br1_sec, bl1_sec
792 SWAP_R br2_sec, bl2_sec
810 ;##############################################################################
812 void camellia128_init(camellia128_ctx_t* s, uint8_t* key){
814 s->kll = 0; //((uint64_t*)key)[0];
816 / * load the key, endian-adjusted, to kll,klr * /
829 s->kar ^= camellia_f(s->kal, camellia_sigma[0]);
830 s->kal ^= camellia_f(s->kar, camellia_sigma[1]);
835 s->kar ^= camellia_f(s->kal, camellia_sigma[2]);
836 s->kal ^= camellia_f(s->kar, camellia_sigma[3]);
838 // uart_putstr("\n\r----------------init finished--------------------");
901 //.global camellia128_init
905 movw r30, r24 ; Z is statepointer
906 movw r26, r22 ; X is keypointer
909 // / * load key into kl, ka and kal to r18:r25 * /
910 adiw r26, 128/8 ;-- 16
916 st Y+, r17 ; this should only be done the last 8 rounds 0<=r16<=7
920 ldi r26, lo8(camellia_sigma)
921 ldi r27, hi8(camellia_sigma)
924 call camellia_p // / * f(x,k) is done * /
926 movw r28, r30 ; Z&Y point on kar now
927 call Y64_load_xor_store
929 // / * step 2 now * /
932 call camellia_p // / * f(x,k) is done * /
933 call Y64_load_xor_store
935 // / * now the xor part (kl and kr) * /
936 sbiw r30, 128/8 ; Z points to klr
945 // / * now s->kar ^= camellia_f(s->kal, camellia_sigma[2]); * /
946 call X64_load ; load sigma[2]
947 movw r26, r28 ; X&Y point at kal
951 sbiw r28, 128/8/2 ; Y points at kar
952 call Y64_load_xor_store
954 // / * now s->kal ^= camellia_f(s->kar, camellia_sigma[3]); * /
956 call X64_load ; load kar
957 ldi r26, lo8(camellia_sigma+3*8)
958 ldi r27, hi8(camellia_sigma+3*8)
959 call X64_xor_in ; xor sigma[3] in
962 call Y64_load_xor_store