X-Git-Url: https://git.cryptolib.org/?a=blobdiff_plain;f=dsa%2Fnoekeon_asm.S;fp=dsa%2Fnoekeon_asm.S;h=0000000000000000000000000000000000000000;hb=7b5401ab9ce23a5da1de8b6c7de3a1aa20ac4cf8;hp=b0a2a16f93da12843c5d39a022103f04f5781c01;hpb=02ac3b653f3a11f284cc1a0cb0e983575f2f431b;p=avr-crypto-lib.git diff --git a/dsa/noekeon_asm.S b/dsa/noekeon_asm.S deleted file mode 100644 index b0a2a16..0000000 --- a/dsa/noekeon_asm.S +++ /dev/null @@ -1,641 +0,0 @@ -/* noekeon_asm.S */ -/* - This file is part of the AVR-Crypto-Lib. - Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de) - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ -/* - * noekeon assembler implementation for avr - * author: Daniel Otte - * email: daniel.otte@rub.de - * license: GPLv3 - */ - -#include - -.macro push_all - push r2 - push r3 - push r4 - push r5 - push r6 - push r7 - push r8 - push r9 - push r10 - push r11 - push r12 - push r13 - push r14 - push r15 - push r16 - push r17 - push r28 - push r29 -.endm - -.macro pop_all - pop r29 - pop r28 - pop r17 - pop r16 - pop r15 - pop r14 - pop r13 - pop r12 - pop r11 - pop r10 - pop r9 - pop r8 - pop r7 - pop r6 - pop r5 - pop r4 - pop r3 - pop r2 - clr r1 -.endm - -push_all_func: - pop r31 - pop r30 - push_all - ijmp - -pop_all_func: - pop r31 - pop r30 - pop_all - ijmp - -.macro xchg a b - eor \a, \b - eor \b, \a - eor \a, \b -.endm - -.macro op32 op a b - \op \a\()_0, \b\()_0 - \op \a\()_1, \b\()_1 - \op \a\()_2, \b\()_2 - \op \a\()_3, \b\()_3 -.endm - - -.macro op32_4t op a b c d w x y z - \op \a, \w - \op \b, \x - \op \c, \y - \op \d, \z -.endm - - -.macro op32_prefix op p q a b c d w x y z - \op \p\()\a, \q\()\w - \op \p\()\b, \q\()\x - \op \p\()\c, \q\()\y - \op \p\()\d, \q\()\z -.endm - -; === bigendian_rotl32 === -; this function rotates a 32bit bigendian word n bits to the left -; param1: the 32-bit value -; given in r25,r24,r23,r22 (r22 is most significant) -; param2: the 8-bit parameter giving the number of bits to rotate -; given in r20 -; return: the rotatet 32-bit word -; given in r25,r24,r23,r22 - -bigendian_rotl32: - /* copy high bit of r22 to carry */ - mov r1, r22 -2: - rol r1 - - rol r25 - rol r24 - rol r23 - rol r22 - - dec r20 - brne 2b -bigendian_rotl32_exit: - clr r1 - ret - - -/******************************************************************************/ - -; === bigendian_rotl32 === -; this function rotates a 32bit bigendian word n bits to the right -; param1: the 32-bit value -; given in r25,r24,r23,r22 (r22 is most significant) -; param2: the 8-bit parameter giving the number of bits to rotate -; given in r20 -; return: the rotatet 32-bit word -; given in r25,r24,r23,r22 - -bigendian_rotr32: - /* copy high bit of r25 to carry */ - - mov r1, r25 -2: - ror r1 - - ror r22 - ror r23 - ror r24 - ror r25 - dec r20 - brne 2b -bigendian_rotr32_exit: - clr r1 - ret - -/******************************************************************************/ -/* -void theta(uint32_t* k, uint32_t* a){ - uint32_t temp; - temp = a[0] ^ a[2]; temp ^= ROTR32(temp, 8) ^ ROTL32(temp, 8); - a[1] ^= temp; - a[3] ^= temp; - - a[0] ^= k[0]; - a[1] ^= k[1]; - a[2] ^= k[2]; - a[3] ^= k[3]; - - temp = a[1] ^ a[3]; temp ^= ROTR32(temp, 8) ^ ROTL32(temp, 8); - a[0] ^= temp; - a[2] ^= temp; -} -*/ - -round_const: .byte 0x1B, 0x36, 0x6C, 0xD8, 0xAB, 0x4D, 0x9A, \ - 0x2F, 0x5E, 0xBC, 0x63, 0xC6, 0x97, 0x35, 0x6A, \ - 0xD4 - -;-- a[0] -state0_0 = 2 -state0_1 = 3 -state0_2 = 4 -state0_3 = 5 -;-- a[1] -state1_0 = 6 -state1_1 = 7 -state1_2 = 8 -state1_3 = 9 -;-- a[2] -state2_0 = 10 -state2_1 = 11 -state2_2 = 12 -state2_3 = 13 -;-- a[3] -state3_0 = 14 -state3_1 = 15 -state3_2 = 16 -state3_3 = 17 - -; === theta === -; -; param1: the state in r2-r17 -; param2: pointer to k in X (r26,r27) -; -temp_a = 18 -temp_b = 19 -temp_c = 20 -temp_d = 21 - -theta: - /* temp = a[0] ^ a[2]; temp ^= temp>>>8 ^ temp<<<8 */ - op32_prefix mov, temp_, state0_, a,b,c,d, 0,1,2,3 - op32_prefix eor, temp_, state2_, a,b,c,d, 0,1,2,3 - - mov r1, temp_a - eor r1, temp_b - eor r1, temp_c - eor r1, temp_d - - op32_prefix eor, temp_, r, a,b,c,d, 1,1,1,1 - - /* temp is know a little bit mixed c,d,a,b (if abcd is normal order) */ - /* a[1] ^= temp */ - eor state1_0, temp_c - eor state1_1, temp_d - eor state1_2, temp_a - eor state1_3, temp_b - /* a[3] ^= temp */ - eor state3_0, temp_c - eor state3_1, temp_d - eor state3_2, temp_a - eor state3_3, temp_b - - /* state ^ k (X points to K) */ - ldi r28, 2 - clr r29 /* Y points to r2 aka state0_0 */ - ldi temp_a, 16 -1: - ld r1, X+ - ld r0, Y - eor r1, r0 - st Y+, r1 - dec temp_a - brne 1b - sbiw r26, 16 /* set X back to key */ - - mov temp_a, state1_0 - mov temp_b, state1_1 - mov temp_c, state1_2 - mov temp_d, state1_3 - eor temp_a, state3_0 - eor temp_b, state3_1 - eor temp_c, state3_2 - eor temp_d, state3_3 - mov r1, temp_a - eor r1, temp_b - eor r1, temp_c - eor r1, temp_d - eor temp_a, r1 - eor temp_b, r1 - eor temp_c, r1 - eor temp_d, r1 - /* temp is know a little bit mixed c,d,a,b (if abcd is normal order) */ - /* a[0] ^= temp */ - eor state0_0, temp_c - eor state0_1, temp_d - eor state0_2, temp_a - eor state0_3, temp_b - /* a[2] ^= temp */ - eor state2_0, temp_c - eor state2_1, temp_d - eor state2_2, temp_a - eor state2_3, temp_b - - clr r1 - ret - -/******************************************************************************/ -#ifndef NOEKEON_NO_ENC -; === noekeon_enc === -; -; param1: pointer to buffer (r24,r25) -; param2: pointer to k (r22,r23) -; -.global noekeon_enc -noekeon_enc: - rcall push_all_func - /* load state */ - movw r26, r22 - ldi r28, 2 - clr r29 /* Y points at r2 aka state0_0 */ - movw r30, r24 /* Z points at state */ - push r30 - push r31 - ldi r22, 16 - push r22 /* 16 is also the number of rounds and gets pushed here */ -1: - ld r0, Z+ - st Y+, r0 - dec r22 - brne 1b - /* state loaded */ - push r1 /* push round constan2 (0x00) */ - ldi r20, 0x80 - push r20 /* push round constan2 (0x00) */ - rjmp 3f -2: - ldi r30, lo8(round_const+15) - ldi r31, hi8(round_const+15) - sub r30, r22 - sbci r31, 0 - clr r1 - push r1 - lpm r0, Z - push r0 -3: - rcall round /* pops rc2 & rc1 */ - pop r22 - dec r22 - push r22 - brne 2b - - pop r22 - - ldi r22, 0xD4 - eor state0_3, r22 - rcall theta - - pop r31 - pop r30 - clr r29 - ldi r28, 2 - ldi r22, 16 -1: - ld r0, Y+ - st Z+, r0 - dec r22 - brne 1b - - rcall pop_all_func - ret -#endif -/******************************************************************************/ -/******************************************************************************/ -#ifndef NOEKEON_NO_DEC - -; === noekeon_dec === -; -; param1: pointer to buffer/state (r24,r25) -; param2: pointer to k (r22,r23) -; -.global noekeon_dec -noekeon_dec: - rcall push_all_func - /* allocate 16 bytes on the stack */ - in r30, _SFR_IO_ADDR(SPL) - in r31, _SFR_IO_ADDR(SPH) - sbiw r30, 16 - out _SFR_IO_ADDR(SPH), r31 - out _SFR_IO_ADDR(SPL), r30 - - adiw r30, 1 - /* push state pointer */ - push r24 - push r25 - movw r26, r22 /* move key ptr to X */ - - /* set stackkey to zero */ - ldi r22, 16 -1: st Z+, r1 - dec r22 - brne 1b - - /* copy key to state */ - clr r29 - ldi r28, 2 - ldi r22, 16 -1: ld r0, X+ - st Y+, r0 - dec r22 - brne 1b - - movw r26, r30 - sbiw r26, 16 /* set X back to begining of stack key */ - rcall theta - - /* mov state to stackkey */ - clr r29 - ldi r28, 2 - ldi r22, 16 -1: ld r0, Y+ - st X+, r0 - dec r22 - brne 1b - sbiw r26, 16 /* set X back to begining of stack key */ - - /* move data from stateptr to state */ - pop r31 - pop r30 - push r30 - push r31 - clr r29 - ldi r28, 2 - ldi r22, 16 - push r22 -1: ld r0, Z+ - st Y+, r0 - dec r22 - brne 1b - -;--- snip 8< ---- - - ldi r20, 0xD4 - push r20 /* push round constant2 (0xD4) */ - push r22 /* push round constan1 (0x00) */ - rjmp 3f -2: - ldi r30, lo8(round_const-1) - ldi r31, hi8(round_const-1) - clr r1 - add r30, r22 - adc r31, r1 - lpm r0, Z - push r0 - push r1 -3: - rcall round /* pops rc2 & rc1 */ - pop r22 - dec r22 - push r22 - brne 2b -;---- - pop r22 - - rcall theta - ldi r22, 0x80 - eor state0_3, r22 - -write_state_back: - /* write state back */ - pop r31 /* pop state pointer */ - pop r30 - clr r29 - ldi r28, 2 - ldi r22, 16 -1: - ld r0, Y+ - st Z+, r0 - dec r22 - brne 1b - - /* remove key from stack */ - in r30, _SFR_IO_ADDR(SPL) - in r31, _SFR_IO_ADDR(SPH) - adiw r30, 16 - out _SFR_IO_ADDR(SPH), r31 - out _SFR_IO_ADDR(SPL), r30 - rcall pop_all_func - ret -#endif -/******************************************************************************/ - - -round: - pop r24 - pop r25 - pop r1 - eor state0_3, r1 - rcall theta - pop r1 - eor state0_3, r1 - push r25 - push r24 -pi_gamma_pi: - ldi r30, pm_lo8(bigendian_rotl32) - ldi r31, pm_hi8(bigendian_rotl32) - rcall pi - /* pi1 done; now gamma */ - rcall gamma_1 - /* a[0] <-> a[3] */ - xchg state0_0, state3_0 - xchg state0_1, state3_1 - xchg state0_2, state3_2 - xchg state0_3, state3_3 - /* a[2] ^= a[0] ^ a[1] ^ a[3] */ - op32 eor, state2, state0 - op32 eor, state2, state1 - op32 eor, state2, state3 - - rcall gamma_1 - ldi r30, pm_lo8(bigendian_rotr32) - ldi r31, pm_hi8(bigendian_rotr32) - rcall pi - ret - -gamma_1: - /* a[1] ^= ~(a[3]|a[2])*/ - mov r1, state3_0 - or r1, state2_0 - com r1 - eor state1_0, r1 - - mov r1, state3_1 - or r1, state2_1 - com r1 - eor state1_1, r1 - - mov r1, state3_2 - or r1, state2_2 - com r1 - eor state1_2, r1 - - mov r1, state3_3 - or r1, state2_3 - com r1 - eor state1_3, r1 - - /* a[0] ^= a[2]&a[1] */ - mov r1, state2_0 - and r1, state1_0 - eor state0_0, r1 - - mov r1, state2_1 - and r1, state1_1 - eor state0_1, r1 - - mov r1, state2_2 - and r1, state1_2 - eor state0_2, r1 - - mov r1, state2_3 - and r1, state1_3 - eor state0_3, r1 - ret - -pi: - /* a[1] <<<= 1*/ - mov r22, state1_0 - mov r23, state1_1 - mov r24, state1_2 - mov r25, state1_3 - ldi r20, 1 - icall - mov state1_0, r22 - mov state1_1, r23 - mov state1_2, r24 - mov state1_3, r25 - /* a[2] <<<= 5*/ - mov r22, state2_0 - mov r23, state2_1 - mov r24, state2_2 - mov r25, state2_3 - ldi r20, 5 - icall - mov state2_0, r22 - mov state2_1, r23 - mov state2_2, r24 - mov state2_3, r25 - /* a[3] <<<= 2*/ - mov r22, state3_0 - mov r23, state3_1 - mov r24, state3_2 - mov r25, state3_3 - ldi r20, 2 - icall - mov state3_0, r22 - mov state3_1, r23 - mov state3_2, r24 - mov state3_3, r25 - ret - -/******************************************************************************/ - -/* -void noekeon_init(void* key, noekeon_ctx_t* ctx){ - uint8_t nullv[16]; - - memset(nullv, 0, 16); - memcpy(ctx, key, 16); - noekeon_enc(ctx, nullv); -} -*/ - -#ifndef NOEKEON_NO_INIT - -.global noekeon_init -noekeon_init: -; === noekeon_init === -; -; param1: pointer to key (r24,r25) -; param2: pointer to context (r22,r23) -; - in r30, _SFR_IO_ADDR(SPL) - in r31, _SFR_IO_ADDR(SPH) - sbiw r30, 16 - out _SFR_IO_ADDR(SPH), r31 - out _SFR_IO_ADDR(SPL), r30 - - movw r26, r22 - adiw r30, 1 - movw r22, r30 - /* set nullv(stack) to zero */ - ldi r20, 16 -1: st Z+, r1 - dec r20 - brne 1b - - /* copy key data to ctx */ - movw r30, r24 - ldi r20, 16 -1: ld r1, Z+ - st X+, r1 - dec r20 - brne 1b - clr r1 - - sbiw r26, 16 - movw r24, r26 - rcall noekeon_enc - - in r30, _SFR_IO_ADDR(SPL) - in r31, _SFR_IO_ADDR(SPH) - adiw r30, 16 - out _SFR_IO_ADDR(SPH), r31 - out _SFR_IO_ADDR(SPL), r30 - ret - -#endif - -