3 This file is part of the AVR-Crypto-Lib.
4 Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de)
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>.
20 * File: camellis-asm.S
23 * License: GPLv3 or later
24 * Description: Implementation of the camellia block cipher algorithm.
35 /* push r18 - r27, r30 - r31*/
100 /* X points to Block */
101 .macro dbg_hexdump length
114 .byte 112, 130, 44, 236, 179, 39, 192, 229, 228, 133, 87, 53, 234, 12, 174, 65
115 .byte 35, 239, 107, 147, 69, 25, 165, 33, 237, 14, 79, 78, 29, 101, 146, 189
116 .byte 134, 184, 175, 143, 124, 235, 31, 206, 62, 48, 220, 95, 94, 197, 11, 26
117 .byte 166, 225, 57, 202, 213, 71, 93, 61, 217, 1, 90, 214, 81, 86, 108, 77
118 .byte 139, 13, 154, 102, 251, 204, 176, 45, 116, 18, 43, 32, 240, 177, 132, 153
119 .byte 223, 76, 203, 194, 52, 126, 118, 5, 109, 183, 169, 49, 209, 23, 4, 215
120 .byte 20, 88, 58, 97, 222, 27, 17, 28, 50, 15, 156, 22, 83, 24, 242, 34
121 .byte 254, 68, 207, 178, 195, 181, 122, 145, 36, 8, 232, 168, 96, 252, 105, 80
122 .byte 170, 208, 160, 125, 161, 137, 98, 151, 84, 91, 30, 149, 224, 255, 100, 210
123 .byte 16, 196, 0, 72, 163, 247, 117, 219, 138, 3, 230, 218, 9, 63, 221, 148
124 .byte 135, 92, 131, 2, 205, 74, 144, 51, 115, 103, 246, 243, 157, 127, 191, 226
125 .byte 82, 155, 216, 38, 200, 55, 198, 59, 129, 150, 111, 75, 19, 190, 99, 46
126 .byte 233, 121, 167, 140, 159, 110, 188, 142, 41, 245, 249, 182, 47, 253, 180, 89
127 .byte 120, 152, 6, 106, 231, 70, 113, 186, 212, 37, 171, 66, 136, 162, 141, 250
128 .byte 114, 7, 185, 85, 248, 238, 172, 10, 54, 73, 42, 104, 60, 56, 241, 164
129 .byte 64, 40, 211, 123, 187, 201, 67, 193, 21, 227, 173, 244, 119, 199, 128, 158
131 //.global camellia_sigma
134 .quad 0xA09E667F3BCC908B
135 .quad 0xB67AE8584CAA73B2
136 .quad 0xC6EF372FE94F82BE
137 .quad 0x54FF53A5F1D36F1C
138 .quad 0x10E527FADE682D1D
139 .quad 0xB05688C2B3E6C1FD
144 /* uint8_t camellia_s1(uint8_t b) */
147 ldi r30, lo8(camellia_sbox)
148 ldi r31, hi8(camellia_sbox)
157 ldi r30, lo8(camellia_sbox)
158 ldi r31, hi8(camellia_sbox)
169 ldi r30, lo8(camellia_sbox)
170 ldi r31, hi8(camellia_sbox)
182 ldi r30, lo8(camellia_sbox)
183 ldi r31, hi8(camellia_sbox)
193 /* uint64_t camellia_s(uint64_t d){
194 #define D ((uint8_t*)(&d))
195 D[7] = camellia_s1(D[7]); // MSB
196 D[6] = camellia_s2(D[6]);
197 D[5] = camellia_s3(D[5]);
198 D[4] = camellia_s4(D[4]);
200 D[3] = camellia_s2(D[3]);
201 D[2] = camellia_s3(D[2]);
202 D[1] = camellia_s4(D[1]);
203 D[0] = camellia_s1(D[0]); // LSB
208 ; d: r18-r25 (r18 is LSB)
210 movw r26, r24 ; backup r24,r25 -> X
247 ;##############################################################################
248 /* uint64_t camellia_p(uint64_t d) */
249 ; param: r18-r25 (r18 is LSB)
280 movw r30, z6 ; backup z5 bis z8
288 ;##############################################################################
290 /* uint64_t camellia_f(uint64_t x, uint64_t k) */
307 ;##############################################################################
309 /* uint64_t camellia_fl(uint64_t x, uint64_t k) */
310 ; param x: r18-r25 xl: r22-r25, xr: r18-r21
311 ; param k: r10-r17 kl: r14-r17, kr: r10-r13
355 ;##############################################################################
357 /* uint64_t camellia_fl_inv(uint64_t y, uint64_t k) */
358 ; param y: r18-r25 yl: r22-r25, yr: r18-r21
359 ; param k: r10-r17 kl: r14-r17, kr: r10-r13
376 .global camellia_fl_inv
386 // the first one is done
403 ;##############################################################################
408 .global camellia128_keyop_rot15
409 camellia128_keyop_rot15:
410 movw r30, r24 ; Z points at LSB of kl ;-- 0
412 2: adiw r30, 15 ;-- 15
415 movw B1, r20 ; store Backup of the 2 MSB of kl
419 1: ld r20, -Z ;-- 13..0
421 std Z+2, r20 ;-- (15..2)
435 ;##############################################################################
438 .global camellia128_keyop_rot17
439 camellia128_keyop_rot17:
500 ;##############################################################################
503 .global camellia128_keyop
506 breq camellia128_keyop_rot17
507 rjmp camellia128_keyop_rot15
509 ;##############################################################################
514 .global camellia128_keyop_inv_rot15
515 camellia128_keyop_inv_rot15:
516 movw r30, r24 ; Z points at LSB of kl ;-- 0
517 movw r26, r24 ; X also
522 movw B1, r20 ; store Backup of the 2 LSB of kl
526 1: ld r21, Z+ ;-- 2/14..3/16
528 st X+, r21 ;-- (0..13)/(1..14)
541 ;##############################################################################
544 .global camellia128_keyop_inv_rot17
545 camellia128_keyop_inv_rot17:
607 ;##############################################################################
610 .global camellia128_keyop_inv
611 camellia128_keyop_inv:
613 breq camellia128_keyop_inv_rot17
614 rjmp camellia128_keyop_inv_rot15
616 ;##############################################################################
617 ; param p: r24-r25 pointer to data
618 ; param l: r22 length of word
619 .global change_endian
635 ;##############################################################################
639 #define KEY_POSTC1 0x00
640 #define KEY_POSTC2 0x01
641 #define KEY_INC2 0x02
643 #define KEY_DIR_NORM 0x00
644 #define KEY_DIR_INV 0x04
645 #define KEY_AMMOUNT 0x08
646 #define KEY_ROL17 0x08
647 #define KEY_ROL15 0x00
649 void camellia_6rounds(camellia128_ctx_t *s, uint64_t *bl, uint64_t *br, uint8_t roundop, uint8_t keychoice){
656 for(i=0; i<3; ++i){ / * each cycle * /
657 br[0] ^= camellia_f(bl[0],*(k[(keychoice&1)*2+((roundop&KEY_DIR)?1:0)]));
660 if((i == 1) && (roundop&KEY_INC2)){
661 ((roundop&KEY_DIR)?camellia128_keyop_inv:camellia128_keyop)(s,(roundop&KEY_AMMOUNT)?1:-1);
664 bl[0] ^= camellia_f(br[0],*(k[(keychoice&1)*2+((roundop&KEY_DIR)?0:1)]));
667 / * check if we should do some keyop * /
668 if((i == (roundop&1)) && (!(roundop&KEY_INC2)) ){
669 ((roundop&KEY_DIR)?camellia128_keyop_inv:camellia128_keyop)(s,(roundop&KEY_AMMOUNT)?1:-1);
670 / * isn't it fuckin nice what we can do in C?! * /
679 ; param keychoice: r16
698 .global camellia_6rounds
721 rol keyop_time // keyop_time == 3
722 SBRC xro, 1 // KEY_INC2
724 SBRS xro, 0 // KEY_POSTC1
726 SBRS xro, 0 // KEY_POSTC1
732 /* now we load the key to r18-r25 */
734 SBRC kc, 0 /* select between KA and KL */
736 SBRC xro_sec, 2 // KEY_DIR
738 SBRS loop_cnt, 0 /* enc */
741 2: SBRC loop_cnt, 0 /* dec */
754 /* now we xor bl in */
772 /* f(x,k) = p(s(x xor k)) ; xor is done */
780 /* now we have to xor the result into br */
784 ; ldi r1, 8 ;-- this won't work
796 /* check for keyop */
797 cp loop_cnt, keyop_time
801 SBRS xro_sec, 3 // KEY_ROL17
803 SBRS xro_sec, 2 // KEY_DIR
805 rcall camellia128_keyop_inv
807 2: rcall camellia128_keyop
809 SWAP_R br1_sec, bl1_sec
810 SWAP_R br2_sec, bl2_sec
828 ;##############################################################################
830 void camellia128_init(camellia128_ctx_t *s, uint8_t *key){
832 s->kll = 0; //((uint64_t*)key)[0];
834 / * load the key, endian-adjusted, to kll,klr * /
847 s->kar ^= camellia_f(s->kal, camellia_sigma[0]);
848 s->kal ^= camellia_f(s->kar, camellia_sigma[1]);
853 s->kar ^= camellia_f(s->kal, camellia_sigma[2]);
854 s->kal ^= camellia_f(s->kar, camellia_sigma[3]);
856 // uart_putstr("\n\r----------------init finished--------------------");
919 //.global camellia128_init
923 movw r30, r24 ; Z is statepointer
924 movw r26, r22 ; X is keypointer
927 // / * load key into kl, ka and kal to r18:r25 * /
928 adiw r26, 128/8 ;-- 16
934 st Y+, r17 ; this should only be done the last 8 rounds 0<=r16<=7
938 ldi r26, lo8(camellia_sigma)
939 ldi r27, hi8(camellia_sigma)
942 rcall camellia_p // / * f(x,k) is done * /
944 movw r28, r30 ; Z&Y point on kar now
945 call Y64_load_xor_store
947 // / * step 2 now * /
950 rcall camellia_p // / * f(x,k) is done * /
951 rcall Y64_load_xor_store
953 // / * now the xor part (kl and kr) * /
954 sbiw r30, 128/8 ; Z points to klr
963 // / * now s->kar ^= camellia_f(s->kal, camellia_sigma[2]); * /
964 rcall X64_load ; load sigma[2]
965 movw r26, r28 ; X&Y point at kal
969 sbiw r28, 128/8/2 ; Y points at kar
970 rcall Y64_load_xor_store
972 // / * now s->kal ^= camellia_f(s->kar, camellia_sigma[3]); * /
974 rcall X64_load ; load kar
975 ldi r26, lo8(camellia_sigma+3*8)
976 ldi r27, hi8(camellia_sigma+3*8)
977 rcall X64_xor_in ; xor sigma[3] in
980 rcall Y64_load_xor_store