+++ /dev/null
-/* camellia-asm.S */
-/*
- This file is part of the AVR-Crypto-Lib.
- Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de)
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
-*/
-/*
- * File: camellis-asm.S
- * Author: Daniel Otte
- * Date: 2006-11-10
- * License: GPLv3 or later
- * Description: Implementation of the camellia block cipher algorithm.
- *
- */
-
-.macro SWAP_R A, B
- eor \A, \B
- eor \B, \A
- eor \A, \B
-.endm
-
-.macro precall
- /* push r18 - r27, r30 - r31*/
- push r0
- push r1
- push r18
- push r19
- push r20
- push r21
- push r22
- push r23
- push r24
- push r25
- push r26
- push r27
- push r30
- push r31
- clr r1
-.endm
-
-.macro postcall
- pop r31
- pop r30
- pop r27
- pop r26
- pop r25
- pop r24
- pop r23
- pop r22
- pop r21
- pop r20
- pop r19
- pop r18
- pop r1
- pop r0
-.endm
-
-
-.macro hexdump length
- push r27
- push r26
- ldi r25, '\r'
- mov r24, r25
- call uart_putc
- ldi r25, '\n'
- mov r24, r25
- call uart_putc
- pop r26
- pop r27
- movw r24, r26
-.if \length > 16
- ldi r22, lo8(16)
- ldi r23, hi8(16)
- push r27
- push r26
- call uart_hexdump
- pop r26
- pop r27
- adiw r26, 16
- hexdump \length-16
-.else
- ldi r22, lo8(\length)
- ldi r23, hi8(\length)
- call uart_hexdump
-.endif
-.endm
-
-/* X points to Block */
-.macro dbg_hexdump length
- precall
- hexdump \length
- postcall
-.endm
-
-SPL = 0x3D
-SPH = 0x3E
-SREG = 0x3F
-NULLr = 1
-
-
-camellia_sbox:
-.byte 112, 130, 44, 236, 179, 39, 192, 229, 228, 133, 87, 53, 234, 12, 174, 65
-.byte 35, 239, 107, 147, 69, 25, 165, 33, 237, 14, 79, 78, 29, 101, 146, 189
-.byte 134, 184, 175, 143, 124, 235, 31, 206, 62, 48, 220, 95, 94, 197, 11, 26
-.byte 166, 225, 57, 202, 213, 71, 93, 61, 217, 1, 90, 214, 81, 86, 108, 77
-.byte 139, 13, 154, 102, 251, 204, 176, 45, 116, 18, 43, 32, 240, 177, 132, 153
-.byte 223, 76, 203, 194, 52, 126, 118, 5, 109, 183, 169, 49, 209, 23, 4, 215
-.byte 20, 88, 58, 97, 222, 27, 17, 28, 50, 15, 156, 22, 83, 24, 242, 34
-.byte 254, 68, 207, 178, 195, 181, 122, 145, 36, 8, 232, 168, 96, 252, 105, 80
-.byte 170, 208, 160, 125, 161, 137, 98, 151, 84, 91, 30, 149, 224, 255, 100, 210
-.byte 16, 196, 0, 72, 163, 247, 117, 219, 138, 3, 230, 218, 9, 63, 221, 148
-.byte 135, 92, 131, 2, 205, 74, 144, 51, 115, 103, 246, 243, 157, 127, 191, 226
-.byte 82, 155, 216, 38, 200, 55, 198, 59, 129, 150, 111, 75, 19, 190, 99, 46
-.byte 233, 121, 167, 140, 159, 110, 188, 142, 41, 245, 249, 182, 47, 253, 180, 89
-.byte 120, 152, 6, 106, 231, 70, 113, 186, 212, 37, 171, 66, 136, 162, 141, 250
-.byte 114, 7, 185, 85, 248, 238, 172, 10, 54, 73, 42, 104, 60, 56, 241, 164
-.byte 64, 40, 211, 123, 187, 201, 67, 193, 21, 227, 173, 244, 119, 199, 128, 158
-
-//.global camellia_sigma
-/*
-camellia_sigma:
-.quad 0xA09E667F3BCC908B
-.quad 0xB67AE8584CAA73B2
-.quad 0xC6EF372FE94F82BE
-.quad 0x54FF53A5F1D36F1C
-.quad 0x10E527FADE682D1D
-.quad 0xB05688C2B3E6C1FD
-*/
-
-
-
-/* uint8_t camellia_s1(uint8_t b) */
-.global camellia_s1
-camellia_s1:
- ldi r30, lo8(camellia_sbox)
- ldi r31, hi8(camellia_sbox)
- add r30, r24
- adc r31, NULLr
- lpm r24, Z
- clr r25
- ret
-
-.global camellia_s2
-camellia_s2:
- ldi r30, lo8(camellia_sbox)
- ldi r31, hi8(camellia_sbox)
- add r30, r24
- adc r31, NULLr
- lpm r24, Z
- lsl r24
- adc r24, NULLr
- clr r25
- ret
-
-.global camellia_s3
-camellia_s3:
- ldi r30, lo8(camellia_sbox)
- ldi r31, hi8(camellia_sbox)
- add r30, r24
- adc r31, NULLr
- lpm r24, Z
- bst r24, 0
- lsr r24
- bld r24, 7
- clr r25
- ret
-
-.global camellia_s4
-camellia_s4:
- ldi r30, lo8(camellia_sbox)
- ldi r31, hi8(camellia_sbox)
- lsl r24
- adc r24, NULLr
- add r30, r24
- adc r31, NULLr
- lpm r24, Z
- clr r25
- ret
-
-.global camellia_s
-/* uint64_t camellia_s(uint64_t d){
- #define D ((uint8_t*)(&d))
- D[7] = camellia_s1(D[7]); // MSB
- D[6] = camellia_s2(D[6]);
- D[5] = camellia_s3(D[5]);
- D[4] = camellia_s4(D[4]);
-
- D[3] = camellia_s2(D[3]);
- D[2] = camellia_s3(D[2]);
- D[1] = camellia_s4(D[1]);
- D[0] = camellia_s1(D[0]); // LSB
- #undef D
- return d;
-}*/
-; parameters
-; d: r18-r25 (r18 is LSB)
-camellia_s:
- movw r26, r24 ; backup r24,r25 -> X
- clr r25
- rcall camellia_s2
- mov r26, r24
-
- mov r24, r27
- rcall camellia_s1
- mov r27, r24
-
- mov r24, r23
- rcall camellia_s3
- mov r23, r24
-
- mov r24, r22
- rcall camellia_s4
- mov r22, r24
-
- mov r24, r21
- rcall camellia_s2
- mov r21, r24
-
- mov r24, r20
- rcall camellia_s3
- mov r20, r24
-
- mov r24, r19
- rcall camellia_s4
- mov r19, r24
-
-
- mov r24, r18
- rcall camellia_s1
- mov r18, r24
-
- movw r24, r26
- ret
-
-;##############################################################################
-/* uint64_t camellia_p(uint64_t d) */
-; param: r18-r25 (r18 is LSB)
-z1 = 25
-z2 = 24
-z3 = 23
-z4 = 22
-z5 = 21
-z6 = 20
-z7 = 19
-z8 = 18
-
-.global camellia_p
-camellia_p:
- eor z1, z6
- eor z2, z7
- eor z3, z8
- eor z4, z5
- eor z5, z3
- eor z6, z4
- eor z7, z1
- eor z8, z2
- ;---------
- eor z1, z8
- eor z2, z5
- eor z3, z6
- eor z4, z7
- eor z5, z4
- eor z6, z1
- eor z7, z2
- eor z8, z3
- ;---------
- movw r26, z8
- movw r30, z6 ; backup z5 bis z8
- movw z8, z4
- movw z6, z2
- movw z4, r26
- movw z2, r30
- ret
-
-
-;##############################################################################
-
-/* uint64_t camellia_f(uint64_t x, uint64_t k) */
-; param x: r18-r25
-; param k: r10-r17
-.global camellia_f
-camellia_f:
- eor r18, r10
- eor r19, r11
- eor r20, r12
- eor r21, r13
- eor r22, r14
- eor r23, r15
- eor r24, r16
- eor r25, r17
- rcall camellia_s
- rcall camellia_p
- ret
-
-;##############################################################################
-
-/* uint64_t camellia_fl(uint64_t x, uint64_t k) */
-; param x: r18-r25 xl: r22-r25, xr: r18-r21
-; param k: r10-r17 kl: r14-r17, kr: r10-r13
-kl1 = 14
-kl2 = 15
-kl3 = 16
-kl4 = 17
-kr1 = 10
-kr2 = 11
-kr3 = 12
-kr4 = 13
-xr1 = 18
-xr2 = 19
-xr3 = 20
-xr4 = 21
-xl1 = 22
-xl2 = 23
-xl3 = 24
-xl4 = 25
-.global camellia_fl
-camellia_fl:
- and kl1, xl1
- and kl2, xl2
- and kl3, xl3
- and kl4, xl4
- mov r26, kl4
- rol r26
- rol kl1
- rol kl2
- rol kl3
- rol kl4
- eor xr1, kl1
- eor xr2, kl2
- eor xr3, kl3
- eor xr4, kl4
- // that was part one
- or kr1, xr1
- or kr2, xr2
- or kr3, xr3
- or kr4, xr4
- eor xl1, kr1
- eor xl2, kr2
- eor xl3, kr3
- eor xl4, kr4
- ret
-
-;##############################################################################
-
-/* uint64_t camellia_fl_inv(uint64_t y, uint64_t k) */
-; param y: r18-r25 yl: r22-r25, yr: r18-r21
-; param k: r10-r17 kl: r14-r17, kr: r10-r13
-kl1 = 14
-kl2 = 15
-kl3 = 16
-kl4 = 17
-kr1 = 10
-kr2 = 11
-kr3 = 12
-kr4 = 13
-yr1 = 18
-yr2 = 19
-yr3 = 20
-yr4 = 21
-yl1 = 22
-yl2 = 23
-yl3 = 24
-yl4 = 25
-.global camellia_fl_inv
-camellia_fl_inv:
- or kr1, yr1
- or kr2, yr2
- or kr3, yr3
- or kr4, yr4
- eor yl1, kr1
- eor yl2, kr2
- eor yl3, kr3
- eor yl4, kr4
- // the first one is done
- and kl1, yl1
- and kl2, yl2
- and kl3, yl3
- and kl4, yl4
- mov r26, kl4
- rol r26
- rol kl1
- rol kl2
- rol kl3
- rol kl4
- eor yr1, kl1
- eor yr2, kl2
- eor yr3, kl3
- eor yr4, kl4
- ret
-
-;##############################################################################
-; param s: r24-r25
-; param q: r22
-B1 = 18
-B2 = 19
-.global camellia128_keyop_rot15
-camellia128_keyop_rot15:
- movw r30, r24 ; Z points at LSB of kl ;-- 0
- ldi r22, 2
-2: adiw r30, 15 ;-- 15
- ld r21, Z
- ld r20, -Z ;-- 14
- movw B1, r20 ; store Backup of the 2 MSB of kl
- ror r20
-
- ldi r21, 14
-1: ld r20, -Z ;-- 13..0
- ror r20
- std Z+2, r20 ;-- (15..2)
- dec r21
- brne 1b
-
- ror B2
- ror B1
- st Z+, B1 ;-- 1
- st Z, B2
- adiw r30, 15 ;-- 16
-
- dec r22
- brne 2b
- ret
-
-;##############################################################################
-; param s: r24-r25
-; param q: r22
-.global camellia128_keyop_rot17
-camellia128_keyop_rot17:
- push r8
- push r9
- push r10
- push r11
- push r12
- push r13
- push r14
- push r15
- push r16
- push r17
- clt
- movw r30, r24
- clr r27
-2: ldi r26, 8
- mov r1, r26
- lsl r1 ; r1=16
- ;push r1
- ; load 128bit value
- ldd r0, Z+15
- rol r0
-1: ld r0, Z+
- rol r0
- st X+, r0
- dec r1
- brne 1b
-
- st -Z, 21
- st -Z, 20
- st -Z, 19
- st -Z, 18
- st -Z, 17
- st -Z, 16
- st -Z, 15
- st -Z, 14 ;--
- st -Z, 13
- st -Z, 12
- st -Z, 11
- st -Z, 10
- st -Z, 9
- st -Z, 8
- st -Z, 23
- st -Z, 22
-
- brts 2f
- set
- adiw r30, 16
- rjmp 2b
-2:
- pop r17
- pop r16
- pop r15
- pop r14
- pop r13
- pop r12
- pop r11
- pop r10
- pop r9
- pop r8
- ret
-
-;##############################################################################
-; param s: r24-r25
-; param q: r22
-.global camellia128_keyop
-camellia128_keyop:
- cpi r22, 1
- breq camellia128_keyop_rot17
- rjmp camellia128_keyop_rot15
-
-;##############################################################################
-; param s: r24-r25
-; param q: r22
-B1 = 18
-B2 = 19
-.global camellia128_keyop_inv_rot15
-camellia128_keyop_inv_rot15:
- movw r30, r24 ; Z points at LSB of kl ;-- 0
- movw r26, r24 ; X also
- ldi r22, 2
-2: ;-- 0
- ld r20, Z+ ;-- 0/1
- ld r21, Z+ ;-- 1/2
- movw B1, r20 ; store Backup of the 2 LSB of kl
- rol r21
-
- ldi r20, 14
-1: ld r21, Z+ ;-- 2/14..3/16
- rol r21
- st X+, r21 ;-- (0..13)/(1..14)
- dec r20
- brne 1b
-
- rol B1
- rol B2
- st X+, B1 ;-- 14/15
- st X+, B2 ;-- 15/16
-
- dec r22
- brne 2b
- ret
-
-;##############################################################################
-; param s: r24-r25
-; param q: r22
-.global camellia128_keyop_inv_rot17
-camellia128_keyop_inv_rot17:
- push r8
- push r9
- push r10
- push r11
- push r12
- push r13
- push r14
- push r15
- push r16
- push r17
- clt
- movw r30, r24
- clr r27
-2: ldi r26, 8
- mov r1, r26
- lsl r1 ; r1=16
- ; load 128bit value
-
- ld r0, Z
- adiw r30, 16
- ror r0
-1: ld r0, -Z
- ror r0
- st X+, r0
- dec r1
- brne 1b
-
- st Z+, 21
- st Z+, 20
- st Z+, 19
- st Z+, 18
- st Z+, 17
- st Z+, 16
- st Z+, 15
- st Z+, 14 ;--
- st Z+, 13
- st Z+, 12
- st Z+, 11
- st Z+, 10
- st Z+, 9
- st Z+, 8
- st Z+, 23
- st Z+, 22
-
- brts 2f
- set
-; adiw r30, 16
- rjmp 2b
-2:
- pop r17
- pop r16
- pop r15
- pop r14
- pop r13
- pop r12
- pop r11
- pop r10
- pop r9
- pop r8
- ret
-
-;##############################################################################
-; param s: r24-r25
-; param q: r22
-.global camellia128_keyop_inv
-camellia128_keyop_inv:
- cpi r22, 1
- breq camellia128_keyop_inv_rot17
- rjmp camellia128_keyop_inv_rot15
-
-;##############################################################################
-; param p: r24-r25 pointer to data
-; param l: r22 length of word
-.global change_endian
-change_endian:
- movw r26, r24
- movw r30, r24
- add r30, r22
- adc r31, r1
- lsr r22
-1:
- ld r20, X
- ld r21, -Z
- st X+, r21
- st Z, r20
- dec r22
- brne 1b
- ret
-
-;##############################################################################
-
-#define SEL_KA 1
-#define SEL_KL 0
-#define KEY_POSTC1 0x00
-#define KEY_POSTC2 0x01
-#define KEY_INC2 0x02
-#define KEY_DIR 0x04
-#define KEY_DIR_NORM 0x00
-#define KEY_DIR_INV 0x04
-#define KEY_AMMOUNT 0x08
-#define KEY_ROL17 0x08
-#define KEY_ROL15 0x00
-/*
-void camellia_6rounds(camellia128_ctx_t* s, uint64_t* bl, uint64_t* br, uint8_t roundop, uint8_t keychoice){
- uint8_t i;
- uint64_t* k[4];
- k[0] = &(s->kll);
- k[1] = &(s->klr);
- k[2] = &(s->kal);
- k[3] = &(s->kar);
- for(i=0; i<3; ++i){ / * each cycle * /
- br[0] ^= camellia_f(bl[0],*(k[(keychoice&1)*2+((roundop&KEY_DIR)?1:0)]));
- keychoice >>= 1;
-
- if((i == 1) && (roundop&KEY_INC2)){
- ((roundop&KEY_DIR)?camellia128_keyop_inv:camellia128_keyop)(s,(roundop&KEY_AMMOUNT)?1:-1);
- }
-
- bl[0] ^= camellia_f(br[0],*(k[(keychoice&1)*2+((roundop&KEY_DIR)?0:1)]));
- keychoice >>= 1;
-
- / * check if we should do some keyop * /
- if((i == (roundop&1)) && (!(roundop&KEY_INC2)) ){
- ((roundop&KEY_DIR)?camellia128_keyop_inv:camellia128_keyop)(s,(roundop&KEY_AMMOUNT)?1:-1);
- / * isn't it fuckin nice what we can do in C?! * /
- }
- }
-}
-*/
-; param s: r24-r25
-; param bl: r22-r23
-; param br: r20-r21
-; param roundop: r18
-; param keychoice: r16
-s1 = 24
-s2 = 25
-bl1 = 22
-bl2 = 23
-br1 = 20
-br2 = 22
-xro = 18
-kc = 16
-xro_sec = 17
-br1_sec = 10
-br2_sec = 11
-bl1_sec = 12
-bl2_sec = 13
-s1_sec = 14
-t = 9
-loop_cnt = 8
-keyop_time = 7
-
-.global camellia_6rounds
-camellia_6rounds:
- push r17
- push r16
- push r15
- push r14
- push r13
- push r12
- push r11
- push r10
- push r9
- push r8
- push r7
-
- ldi r17, 6
- mov loop_cnt, r17
- mov xro_sec, xro
- movw br1_sec, br1
- movw bl1_sec, bl1
- movw s1_sec, s1
- clr keyop_time
- inc keyop_time
- sec
- rol keyop_time // keyop_time == 3
- SBRC xro, 1 // KEY_INC2
- rjmp 1f
- SBRS xro, 0 // KEY_POSTC1
- inc keyop_time
- SBRS xro, 0 // KEY_POSTC1
- inc keyop_time
- rjmp 2f
-1: inc keyop_time
-2:
-main_loop:
- /* now we load the key to r18-r25 */
- movw r26, s1_sec
- SBRC kc, 0 /* select between KA and KL */
- adiw r26, 16
- SBRC xro_sec, 2 // KEY_DIR
- rjmp 2f
- SBRS loop_cnt, 0 /* enc */
- adiw r26, 8
- rjmp 3f
-2: SBRC loop_cnt, 0 /* dec */
- adiw r26, 8
- rjmp 3f
-3:
- lsr kc
- ld r18, X+
- ld r19, X+
- ld r20, X+
- ld r21, X+
- ld r22, X+
- ld r23, X+
- ld r24, X+
- ld r25, X+
- /* now we xor bl in */
- movw r26, bl1_sec
- ld r0, X+
- eor r18, r0
- ld r0, X+
- eor r19, r0
- ld r0, X+
- eor r20, r0
- ld r0, X+
- eor r21, r0
- ld r0, X+
- eor r22, r0
- ld r0, X+
- eor r23, r0
- ld r0, X+
- eor r24, r0
- ld r0, X+
- eor r25, r0
- /* f(x,k) = p(s(x xor k)) ; xor is done */
- call camellia_s;
- call camellia_p;
-
-// in r26, SPL
-// in r27, SPH
-// sbiw r26, 9
-// dbg_hexdump 10
- /* now we have to xor the result into br */
- clr r31
- ldi r30, 18
- movw r26, br1_sec
-; ldi r1, 8 ;-- this won't work
- clr r1
- sec
- ror r1
- swap r1
-1: ld r0, X
- ld t, Z+
- eor r0, t
- st X+, r0
- dec r1
- brne 1b
-
- /* check for keyop */
- cp loop_cnt, keyop_time
- brne 3f
- movw s1, s1_sec
- ldi r22, 1
- SBRS xro_sec, 3 // KEY_ROL17
- neg r22
- SBRS xro_sec, 2 // KEY_DIR
- rjmp 2f
- rcall camellia128_keyop_inv
- rjmp 3f
-2: rcall camellia128_keyop
-3: /* loop back */
- SWAP_R br1_sec, bl1_sec
- SWAP_R br2_sec, bl2_sec
- dec loop_cnt
- breq 2f
- rjmp main_loop
-2:
- pop r7
- pop r8
- pop r9
- pop r10
- pop r11
- pop r12
- pop r13
- pop r14
- pop r15
- pop r16
- pop r17
- ret
-
-;##############################################################################
-/*
-void camellia128_init(camellia128_ctx_t* s, uint8_t* key){
- uint8_t i;
- s->kll = 0; //((uint64_t*)key)[0];
-
- / * load the key, endian-adjusted, to kll,klr * /
- for(i=0; i<8; ++i){
- s->kll <<= 8;
- s->kll |= *key++;
- }
- for(i=0; i<8; ++i){
- s->klr <<= 8;
- s->klr |= *key++;
- }
-
- s->kal = s->kll;
- s->kar = s->klr;
-
- s->kar ^= camellia_f(s->kal, camellia_sigma[0]);
- s->kal ^= camellia_f(s->kar, camellia_sigma[1]);
-
- s->kal ^= s->kll;
- s->kar ^= s->klr;
-
- s->kar ^= camellia_f(s->kal, camellia_sigma[2]);
- s->kal ^= camellia_f(s->kar, camellia_sigma[3]);
- / * * /
-// uart_putstr("\n\r----------------init finished--------------------");
-}
-*/
-/*
-X64_xor_in:
- ld r0, X+
- eor r18, r0
- ld r0, X+
- eor r19, r0
- ld r0, X+
- eor r20, r0
- ld r0, X+
- eor r21, r0
- ld r0, X+
- eor r22, r0
- ld r0, X+
- eor r23, r0
- ld r0, X+
- eor r24, r0
- ld r0, X+
- eor r25, r0
- ret
-
-X64_load:
- ld r18, X+
- ld r19, X+
- ld r20, X+
- ld r21, X+
- ld r22, X+
- ld r23, X+
- ld r24, X+
- ld r25, X+
- ret
-
-Y64_load_xor_store:
- ld r0, Y
- eor r18, r0
- st Y+, r18
- ld r0, Y
- eor r19, r0
- st Y+, r19
- ld r0, Y
- eor r20, r0
- st Y+, r20
- ld r0, Y
- eor r21, r0
- st Y+, r21
- ld r0, Y
- eor r22, r0
- st Y+, r22
- ld r0, Y
- eor r23, r0
- st Y+, r23
- ld r0, Y
- eor r24, r0
- st Y+, r24
- ld r0, Y
- eor r25, r0
- st Y+, r25
- ret
-
-; param s: r24-r25
-; param *k: r22-r23
-//.global camellia128_init
-camellia128_init:
- push r29
- push r28
- movw r30, r24 ; Z is statepointer
- movw r26, r22 ; X is keypointer
- clr r29
- ldi r28, 18
-// / * load key into kl, ka and kal to r18:r25 * /
- adiw r26, 128/8 ;-- 16
- ldi r16, (128/8)-1
-1: ld r17, -X
- std Z+(128/8), r17
- st Z+, r17
- sbrs r16, 3
- st Y+, r17 ; this should only be done the last 8 rounds 0<=r16<=7
- dec r16
- brpl 1b
-// / * step 1 * /
- ldi r26, lo8(camellia_sigma)
- ldi r27, hi8(camellia_sigma)
- rcall X64_xor_in
- rcall camellia_s
- rcall camellia_p // / * f(x,k) is done * /
- sbiw r30, 128/8
- movw r28, r30 ; Z&Y point on kar now
- call Y64_load_xor_store
-
-// / * step 2 now * /
- rcall X64_xor_in
- rcall camellia_s
- rcall camellia_p // / * f(x,k) is done * /
- rcall Y64_load_xor_store
-
-// / * now the xor part (kl and kr) * /
- sbiw r30, 128/8 ; Z points to klr
- ldi r16, 128/8
-1: ld r0, Z+
- ldd r1, Z+(128/8)-1
- eor r0, r1
- std Z+(128/8)-1, r0
- dec r16
- brne 1b
-
-// / * now s->kar ^= camellia_f(s->kal, camellia_sigma[2]); * /
- rcall X64_load ; load sigma[2]
- movw r26, r28 ; X&Y point at kal
- rcall X64_xor_in
- rcall camellia_s
- rcall camellia_p
- sbiw r28, 128/8/2 ; Y points at kar
- rcall Y64_load_xor_store
-
-// / * now s->kal ^= camellia_f(s->kar, camellia_sigma[3]); * /
- sbiw r26, 128/8 ;
- rcall X64_load ; load kar
- ldi r26, lo8(camellia_sigma+3*8)
- ldi r27, hi8(camellia_sigma+3*8)
- rcall X64_xor_in ; xor sigma[3] in
- rcall camellia_s
- rcall camellia_p
- rcall Y64_load_xor_store
-
- pop r28
- pop r29
- ret
-
-//*/
-
-
-
-
-
-
-
-
-
-