X-Git-Url: https://git.cryptolib.org/?a=blobdiff_plain;f=noekeon_asm.S;h=7fc05069005d2acb79a2c847609017bffadaea0a;hb=17332291e15183d71d88ed868275e3cb53917180;hp=f3f904dcc21303f2048185f024c0c8034b225e80;hpb=06a565f432ed3f51cbd9d88807b9860474c38938;p=avr-crypto-lib.git diff --git a/noekeon_asm.S b/noekeon_asm.S index f3f904d..7fc0506 100644 --- a/noekeon_asm.S +++ b/noekeon_asm.S @@ -1,3 +1,21 @@ +/* noekeon_asm.S */ +/* + This file is part of the This file is part of the AVR-Crypto-Lib. + Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ /* * noekeon assembler implementation for avr * author: Daniel Otte @@ -26,13 +44,9 @@ push r17 push r28 push r29 - in r28, _SFR_IO_ADDR(SREG) - push r28 .endm .macro pop_all - pop r28 - out _SFR_IO_ADDR(SREG), r28 pop r29 pop r28 pop r17 @@ -54,6 +68,18 @@ clr r1 .endm +push_all_func: + pop r31 + pop r30 + push_all + ijmp + +pop_all_func: + pop r31 + pop r30 + pop_all + ijmp + .macro xchg a b eor \a, \b eor \b, \a @@ -83,7 +109,6 @@ \op \p\()\d, \q\()\z .endm -.global bigendian_rotl32 ; === bigendian_rotl32 === ; this function rotates a 32bit bigendian word n bits to the left ; param1: the 32-bit value @@ -94,7 +119,6 @@ ; given in r25,r24,r23,r22 bigendian_rotl32: - in r0, _SFR_IO_ADDR(SREG) /* copy high bit of r22 to carry */ mov r1, r22 2: @@ -109,13 +133,11 @@ bigendian_rotl32: brne 2b bigendian_rotl32_exit: clr r1 - out _SFR_IO_ADDR(SREG), r0 ret /******************************************************************************/ -.global bigendian_rotr32 ; === bigendian_rotl32 === ; this function rotates a 32bit bigendian word n bits to the right ; param1: the 32-bit value @@ -126,7 +148,6 @@ bigendian_rotl32_exit: ; given in r25,r24,r23,r22 bigendian_rotr32: - in r0, _SFR_IO_ADDR(SREG) /* copy high bit of r25 to carry */ mov r1, r25 @@ -141,7 +162,6 @@ bigendian_rotr32: brne 2b bigendian_rotr32_exit: clr r1 - out _SFR_IO_ADDR(SREG), r0 ret /******************************************************************************/ @@ -267,14 +287,15 @@ theta: ret /******************************************************************************/ +#ifndef NOEKEON_NO_ENC ; === noekeon_enc === ; -; param1: pointer to buffer/state (r24,r25) +; param1: pointer to buffer (r24,r25) ; param2: pointer to k (r22,r23) ; .global noekeon_enc noekeon_enc: - push_all + rcall push_all_func /* load state */ movw r26, r22 ldi r28, 2 @@ -304,7 +325,7 @@ noekeon_enc: lpm r0, Z push r0 3: - call round /* pops rc2 & rc1 */ + rcall round /* pops rc2 & rc1 */ pop r22 dec r22 push r22 @@ -314,7 +335,7 @@ noekeon_enc: ldi r22, 0xD4 eor state0_3, r22 - call theta + rcall theta pop r31 pop r30 @@ -327,10 +348,13 @@ noekeon_enc: dec r22 brne 1b - pop_all + rcall pop_all_func ret +#endif /******************************************************************************/ /******************************************************************************/ +#ifndef NOEKEON_NO_DEC + ; === noekeon_dec === ; ; param1: pointer to buffer/state (r24,r25) @@ -338,7 +362,7 @@ noekeon_enc: ; .global noekeon_dec noekeon_dec: - push_all + rcall push_all_func /* allocate 16 bytes on the stack */ in r30, _SFR_IO_ADDR(SPL) in r31, _SFR_IO_ADDR(SPH) @@ -369,7 +393,7 @@ noekeon_dec: movw r26, r30 sbiw r26, 16 /* set X back to begining of stack key */ - call theta + rcall theta /* mov state to stackkey */ clr r29 @@ -411,7 +435,7 @@ noekeon_dec: push r0 push r1 3: - call round /* pops rc2 & rc1 */ + rcall round /* pops rc2 & rc1 */ pop r22 dec r22 push r22 @@ -419,7 +443,7 @@ noekeon_dec: ;---- pop r22 - call theta + rcall theta ldi r22, 0x80 eor state0_3, r22 @@ -442,25 +466,28 @@ write_state_back: adiw r30, 16 out _SFR_IO_ADDR(SPH), r31 out _SFR_IO_ADDR(SPL), r30 - pop_all + rcall pop_all_func ret +#endif /******************************************************************************/ + round: pop r24 pop r25 pop r1 eor state0_3, r1 - call theta + rcall theta pop r1 eor state0_3, r1 push r25 push r24 pi_gamma_pi: - clc - call pi + ldi r30, pm_lo8(bigendian_rotl32) + ldi r31, pm_hi8(bigendian_rotl32) + rcall pi /* pi1 done; now gamma */ - call gamma_1 + rcall gamma_1 /* a[0] <-> a[3] */ xchg state0_0, state3_0 xchg state0_1, state3_1 @@ -470,23 +497,11 @@ pi_gamma_pi: op32 eor, state2, state0 op32 eor, state2, state1 op32 eor, state2, state3 -/* - eor state2_0, state0_0 - eor state2_1, state0_1 - eor state2_2, state0_2 - eor state2_3, state0_3 - eor state2_0, state1_0 - eor state2_1, state1_1 - eor state2_2, state1_2 - eor state2_3, state1_3 - eor state2_0, state3_0 - eor state2_1, state3_1 - eor state2_2, state3_2 - eor state2_3, state3_3 -*/ - call gamma_1 - sec - call pi + + rcall gamma_1 + ldi r30, pm_lo8(bigendian_rotr32) + ldi r31, pm_hi8(bigendian_rotr32) + rcall pi ret gamma_1: @@ -529,17 +544,7 @@ gamma_1: eor state0_3, r1 ret -pi: - brcs 1f - ldi r30, lo8(bigendian_rotl32) - ldi r31, hi8(bigendian_rotl32) - rjmp 2f -1: - ldi r30, lo8(bigendian_rotr32) - ldi r31, hi8(bigendian_rotr32) -2: - lsr r31 - ror r30 +pi: /* a[1] <<<= 1*/ mov r22, state1_0 mov r23, state1_1 @@ -574,61 +579,63 @@ pi: mov state3_2, r24 mov state3_3, r25 ret + +/******************************************************************************/ -;------- trash follows -------- +/* +void noekeon_init(void* key, noekeon_ctx_t* ctx){ + uint8_t nullv[16]; + + memset(nullv, 0, 16); + memcpy(ctx, key, 16); + noekeon_enc(ctx, nullv); +} +*/ +#ifndef NOEKEON_NO_INIT +.global noekeon_init +noekeon_init: +; === noekeon_init === +; +; param1: pointer to key (r24,r25) +; param2: pointer to context (r22,r23) +; + in r30, _SFR_IO_ADDR(SPL) + in r31, _SFR_IO_ADDR(SPH) + sbiw r30, 16 + out _SFR_IO_ADDR(SPH), r31 + out _SFR_IO_ADDR(SPL), r30 - /* load state */ movw r26, r22 - ldi r28, 2 - clr r29 /* Y points at r2 aka state0_0 */ - ldi r22, 16 -1: /* copy key to state */ - ld r0, X+ - st Y+, r0 - dec r22 + adiw r30, 1 + movw r22, r30 + /* set nullv(stack) to zero */ + ldi r20, 16 +1: st Z+, r1 + dec r20 brne 1b - movw r26, r30 - - clr r1 - ldi r22, 16 -1: /* set key to zero */ - st Z+, r1 - dec r22 - brne 1b - - call theta - - ldi r22, 16 -1: /* write key back */ - ld r0, -Y - st -Z, r0 - dec r22 + /* copy key data to ctx */ + movw r30, r24 + ldi r20, 16 +1: ld r1, Z+ + st X+, r1 + dec r20 brne 1b - -; movw r26, r30 /* move keypointer to X */ -; adiw r26, 1 - movw r30, r24 /* Z points at state */ - push r30 /* push state pointer */ - push r31 + clr r1 - ;-- - clr r29 - ldi r28, 2 - ;-- - ldi r22, 16 - push r22 /* 16 is also the number of rounds and gets pushed here */ - ldi r22, 16 -1: /* load state */ - ld r0, Z+ - st Y+, r0 - dec r22 - brne 1b - /* state loaded */ - - -;------- ------------- -------- + sbiw r26, 16 + movw r24, r26 + rcall noekeon_enc + + in r30, _SFR_IO_ADDR(SPL) + in r31, _SFR_IO_ADDR(SPH) + adiw r30, 16 + out _SFR_IO_ADDR(SPH), r31 + out _SFR_IO_ADDR(SPL), r30 + ret + +#endif