X-Git-Url: https://git.cryptolib.org/?a=blobdiff_plain;f=noekeon_asm.S;h=8ebcc93c6578ce3b13698cf4e9bff2cf084c7278;hb=96ebafd201c9e8441c7677577b24aa402c1defc6;hp=f3f904dcc21303f2048185f024c0c8034b225e80;hpb=06a565f432ed3f51cbd9d88807b9860474c38938;p=avr-crypto-lib.git diff --git a/noekeon_asm.S b/noekeon_asm.S index f3f904d..8ebcc93 100644 --- a/noekeon_asm.S +++ b/noekeon_asm.S @@ -1,3 +1,21 @@ +/* noekeon_asm.S */ +/* + This file is part of the Crypto-avr-lib/microcrypt-lib. + Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ /* * noekeon assembler implementation for avr * author: Daniel Otte @@ -26,13 +44,9 @@ push r17 push r28 push r29 - in r28, _SFR_IO_ADDR(SREG) - push r28 .endm .macro pop_all - pop r28 - out _SFR_IO_ADDR(SREG), r28 pop r29 pop r28 pop r17 @@ -83,7 +97,6 @@ \op \p\()\d, \q\()\z .endm -.global bigendian_rotl32 ; === bigendian_rotl32 === ; this function rotates a 32bit bigendian word n bits to the left ; param1: the 32-bit value @@ -94,7 +107,6 @@ ; given in r25,r24,r23,r22 bigendian_rotl32: - in r0, _SFR_IO_ADDR(SREG) /* copy high bit of r22 to carry */ mov r1, r22 2: @@ -109,13 +121,11 @@ bigendian_rotl32: brne 2b bigendian_rotl32_exit: clr r1 - out _SFR_IO_ADDR(SREG), r0 ret /******************************************************************************/ -.global bigendian_rotr32 ; === bigendian_rotl32 === ; this function rotates a 32bit bigendian word n bits to the right ; param1: the 32-bit value @@ -126,7 +136,6 @@ bigendian_rotl32_exit: ; given in r25,r24,r23,r22 bigendian_rotr32: - in r0, _SFR_IO_ADDR(SREG) /* copy high bit of r25 to carry */ mov r1, r25 @@ -141,7 +150,6 @@ bigendian_rotr32: brne 2b bigendian_rotr32_exit: clr r1 - out _SFR_IO_ADDR(SREG), r0 ret /******************************************************************************/ @@ -457,7 +465,8 @@ round: push r25 push r24 pi_gamma_pi: - clc + ldi r30, pm_lo8(bigendian_rotl32) + ldi r31, pm_hi8(bigendian_rotl32) call pi /* pi1 done; now gamma */ call gamma_1 @@ -470,22 +479,10 @@ pi_gamma_pi: op32 eor, state2, state0 op32 eor, state2, state1 op32 eor, state2, state3 -/* - eor state2_0, state0_0 - eor state2_1, state0_1 - eor state2_2, state0_2 - eor state2_3, state0_3 - eor state2_0, state1_0 - eor state2_1, state1_1 - eor state2_2, state1_2 - eor state2_3, state1_3 - eor state2_0, state3_0 - eor state2_1, state3_1 - eor state2_2, state3_2 - eor state2_3, state3_3 -*/ + call gamma_1 - sec + ldi r30, pm_lo8(bigendian_rotr32) + ldi r31, pm_hi8(bigendian_rotr32) call pi ret @@ -529,17 +526,7 @@ gamma_1: eor state0_3, r1 ret -pi: - brcs 1f - ldi r30, lo8(bigendian_rotl32) - ldi r31, hi8(bigendian_rotl32) - rjmp 2f -1: - ldi r30, lo8(bigendian_rotr32) - ldi r31, hi8(bigendian_rotr32) -2: - lsr r31 - ror r30 +pi: /* a[1] <<<= 1*/ mov r22, state1_0 mov r23, state1_1 @@ -574,61 +561,60 @@ pi: mov state3_2, r24 mov state3_3, r25 ret - -;------- trash follows -------- - +/******************************************************************************/ + +/* +void noekeon_init(void* key, noekeon_ctx_t* ctx){ + uint8_t nullv[16]; + + memset(nullv, 0, 16); + memcpy(ctx, key, 16); + noekeon_enc(ctx, nullv); +} +*/ +.global noekeon_init +noekeon_init: +; === noekeon_init === +; +; param1: pointer to key (r24,r25) +; param2: pointer to context (r22,r23) +; + in r30, _SFR_IO_ADDR(SPL) + in r31, _SFR_IO_ADDR(SPH) + sbiw r30, 16 + out _SFR_IO_ADDR(SPH), r31 + out _SFR_IO_ADDR(SPL), r30 - /* load state */ movw r26, r22 - ldi r28, 2 - clr r29 /* Y points at r2 aka state0_0 */ - ldi r22, 16 -1: /* copy key to state */ - ld r0, X+ - st Y+, r0 - dec r22 - brne 1b - - movw r26, r30 - - clr r1 - ldi r22, 16 -1: /* set key to zero */ - st Z+, r1 - dec r22 + adiw r30, 1 + movw r22, r30 + /* set nullv(stack) to zero */ + ldi r20, 16 +1: st Z+, r1 + dec r20 brne 1b - - call theta - ldi r22, 16 -1: /* write key back */ - ld r0, -Y - st -Z, r0 - dec r22 + /* copy key data to ctx */ + movw r30, r24 + ldi r20, 16 +1: ld r1, Z+ + st X+, r1 + dec r20 brne 1b - -; movw r26, r30 /* move keypointer to X */ -; adiw r26, 1 - movw r30, r24 /* Z points at state */ - push r30 /* push state pointer */ - push r31 + clr r1 + + sbiw r26, 16 + movw r24, r26 + call noekeon_enc + + in r30, _SFR_IO_ADDR(SPL) + in r31, _SFR_IO_ADDR(SPH) + adiw r30, 16 + out _SFR_IO_ADDR(SPH), r31 + out _SFR_IO_ADDR(SPL), r30 + ret - ;-- - clr r29 - ldi r28, 2 - ;-- - ldi r22, 16 - push r22 /* 16 is also the number of rounds and gets pushed here */ - ldi r22, 16 -1: /* load state */ - ld r0, Z+ - st Y+, r0 - dec r22 - brne 1b - /* state loaded */ - -;------- ------------- --------