X-Git-Url: https://git.cryptolib.org/?a=blobdiff_plain;f=skein%2Fthreefish1024_enc_asm.S;h=e77d0151778b6b8c6694963f534506ce1d8b1e88;hb=b8aa8d06134ce687993cb3148d66ebd86dbf1d6f;hp=38bf04c587320b8dcb03bd2a6dde00ef9a23d356;hpb=4f50c75ee5a6cc88bf7ea71957ed509e298e6c25;p=avr-crypto-lib.git diff --git a/skein/threefish1024_enc_asm.S b/skein/threefish1024_enc_asm.S index 38bf04c..e77d015 100644 --- a/skein/threefish1024_enc_asm.S +++ b/skein/threefish1024_enc_asm.S @@ -1,7 +1,7 @@ /* threefish1024_enc_asm.S */ /* This file is part of the AVR-Crypto-Lib. - Copyright (C) 2009 Daniel Otte (daniel.otte@rub.de) + Copyright (C) 2006-2015 Daniel Otte (bg@nerilex.org) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,10 +18,10 @@ */ /* * \author Daniel Otte - * \email daniel.otte@rub.de + * \email bg@nerilex.org * \date 2009-03-24 * \license GPLv3 or later - */ + */ #include "avr-asm-macros.S" @@ -39,7 +39,7 @@ A7 = 21 #define K(s) (((uint64_t*)key)[(s)]) #define T(s) (((uint64_t*)tweak)[(s)]) -void threefish1024_init(const void* key, const void* tweak, threefish512_ctx_t* ctx){ +void threefish1024_init(const void *key, const void *tweak, threefish512_ctx_t *ctx){ memcpy(ctx->k, key, 16*8); memcpy(ctx->t, tweak, 2*8); uint8_t i; @@ -61,11 +61,20 @@ threefish1024_init: movw r30, r20 movw r26, r24 ldi r24, 16 - ldi A7, 0x55 - mov A6, A7 - movw A4, A6 - movw A2, A6 +; ldi A7, 0x55 +; mov A6, A7 +; movw A4, A6 +; movw A2, A6 +; movw A0, A6 + ldi A6, 0x22 ; 0x1BD1.1BDA.A9FC.1A22 + ldi A7, 0x1A movw A0, A6 + ldi A2, 0xFC + ldi A3, 0xA9 + ldi A4, 0xDA + ldi A5, 0x1B + ldi A6, 0xD1 + ldi A7, 0x1B 1: ld r0, X+ st Z+, r0 @@ -113,7 +122,7 @@ threefish1024_init: dec r26 brne 1b rjmp 9f -3: +3: ld A0, X+ ld A1, X+ ld A2, X+ @@ -165,11 +174,11 @@ threefish1024_init: 9: pop_range 14, 17 ret - + /******************************************************************************/ /* #define X(a) (((uint64_t*)data)[(a)]) -void permute_16(void* data){ +void permute_16(void *data){ uint64_t t; t = X(1); X(1) = X(9); @@ -190,7 +199,7 @@ void permute_16(void* data){ X(12) = X(14); X(14) = t; } -void add_key_16(void* data, const threefish1024_ctx_t* ctx, uint8_t s){ +void add_key_16(void *data, const threefish1024_ctx_t *ctx, uint8_t s){ uint8_t i; for(i=0; i<13; ++i){ X(i) += ctx->k[(s+i)%17]; @@ -199,7 +208,7 @@ void add_key_16(void* data, const threefish1024_ctx_t* ctx, uint8_t s){ X(14) += ctx->k[(s+14)%17] + ctx->t[(s+1)%3]; X(15) += ctx->k[(s+15)%17] + s; } -void threefish1024_enc(void* data, const threefish1024_ctx_t* ctx){ +void threefish1024_enc(void *data, const threefish1024_ctx_t *ctx){ uint8_t i=0,s=0; uint8_t r0[8] = {55, 25, 33, 34, 28, 17, 58, 47}; uint8_t r1[8] = {43, 25, 8, 43, 7, 6, 7, 49}; @@ -208,7 +217,7 @@ void threefish1024_enc(void* data, const threefish1024_ctx_t* ctx){ uint8_t r4[8] = {16, 14, 21, 44, 51, 43, 19, 37}; uint8_t r5[8] = {22, 13, 12, 9, 9, 42, 18, 48}; uint8_t r6[8] = {38, 52, 32, 59, 35, 40, 2, 53}; - uint8_t r7[8] = {12, 57, 54, 34, 41, 15, 56, 56}; + uint8_t r7[8] = {12, 57, 54, 34, 41, 15, 56, 56}; do{ if(i%4==0){ add_key_16(data, ctx, s); @@ -256,7 +265,7 @@ threefish1024_enc: movw CTX0, r22 clr I clr S -1: +1: mov r30, I andi r30, 0x03 breq 2f @@ -405,8 +414,8 @@ exit: pop_range 2, 17 pop r29 pop r28 - ret -4: + ret +4: /* call mix */ ldi r30, lo8(threefish1024_rc0) ldi r31, hi8(threefish1024_rc0) @@ -434,7 +443,7 @@ exit: push IDX4 push IDX3 push IDX2 - + movw r24, DATA0 call threefish_mix_asm /* no rcall? */ movw r24, DATA0 @@ -488,7 +497,7 @@ exit: movw r30, DATA0 adiw r30, 7*8 rcall xchg_zx8 - /* --- */ + /* --- */ movw r26, DATA0 /* X3 <-> X11 */ adiw r26, 3*8 movw r30, DATA0 @@ -535,7 +544,7 @@ exit: movw r30, DATA0 adiw r30, 6*8 rcall xchg_zx8 - + inc I ; mov r26, I ; cpi r26, 5 @@ -543,8 +552,8 @@ exit: ; rjmp exit 9: rjmp 1b - -threefish1024_slut17: + +threefish1024_slut17: .byte 0x00, 0x08, 0x10, 0x18, 0x20, 0x28, 0x30, 0x38 .byte 0x40, 0x48, 0x50, 0x58, 0x60, 0x68, 0x70, 0x78 .byte 0x80, 0x00, 0x08, 0x10, 0x18, 0x20, 0x28, 0x30 @@ -555,14 +564,24 @@ threefish1024_slut3: .byte 0x10, 0x00, 0x08, 0x10, 0x00, 0x08, 0x10, 0x00 .byte 0x08, 0x10, 0x00, 0x08, 0x10, 0x00, 0x08, 0x10 .byte 0x00 -threefish1024_rc0: .byte 0x79, 0x31, 0x41, 0x42, 0x34, 0x21, 0x72, 0x69 -threefish1024_rc1: .byte 0x53, 0x31, 0x10, 0x53, 0x19, 0x1a, 0x19, 0x61 -threefish1024_rc2: .byte 0x5b, 0x6a, 0x22, 0x31, 0x69, 0x22, 0x40, 0x33 -threefish1024_rc3: .byte 0x50, 0x2b, 0x71, 0x74, 0x60, 0x31, 0x6b, 0x72 -threefish1024_rc4: .byte 0x20, 0x2a, 0x3b, 0x54, 0x63, 0x53, 0x23, 0x5b -threefish1024_rc5: .byte 0x3a, 0x2b, 0x14, 0x11, 0x11, 0x52, 0x22, 0x60 -threefish1024_rc6: .byte 0x5a, 0x64, 0x40, 0x73, 0x43, 0x50, 0x02, 0x7b -threefish1024_rc7: .byte 0x14, 0x71, 0x7a, 0x42, 0x51, 0x29, 0x70, 0x70 +/* old round constants +threefish1024_rc0: .byte 0x79, 0x31, 0x41, 0x42, 0x34, 0x21, 0x72, 0x69 +threefish1024_rc1: .byte 0x53, 0x31, 0x10, 0x53, 0x19, 0x1a, 0x19, 0x61 +threefish1024_rc2: .byte 0x5b, 0x6a, 0x22, 0x31, 0x69, 0x22, 0x40, 0x33 +threefish1024_rc3: .byte 0x50, 0x2b, 0x71, 0x74, 0x60, 0x31, 0x6b, 0x72 +threefish1024_rc4: .byte 0x20, 0x2a, 0x3b, 0x54, 0x63, 0x53, 0x23, 0x5b +threefish1024_rc5: .byte 0x3a, 0x2b, 0x14, 0x11, 0x11, 0x52, 0x22, 0x60 +threefish1024_rc6: .byte 0x5a, 0x64, 0x40, 0x73, 0x43, 0x50, 0x02, 0x7b +threefish1024_rc7: .byte 0x14, 0x71, 0x7a, 0x42, 0x51, 0x29, 0x70, 0x70 +*/ +threefish1024_rc0: .byte 0x30, 0x5a, 0x41, 0x1b, 0x51, 0x20, 0x49, 0x11 +threefish1024_rc1: .byte 0x2b, 0x23, 0x04, 0x24, 0x11, 0x42, 0x54, 0x60 +threefish1024_rc2: .byte 0x10, 0x12, 0x63, 0x60, 0x5b, 0x70, 0x69, 0x43 +threefish1024_rc3: .byte 0x69, 0x79, 0x2b, 0x51, 0x49, 0x63, 0x6a, 0x64 +threefish1024_rc4: .byte 0x10, 0x61, 0x42, 0x69, 0x14, 0x04, 0x23, 0x39 +threefish1024_rc5: .byte 0x21, 0x22, 0x51, 0x34, 0x69, 0x7b, 0x52, 0x49 +threefish1024_rc6: .byte 0x3a, 0x39, 0x73, 0x20, 0x54, 0x52, 0x54, 0x5b +threefish1024_rc7: .byte 0x5b, 0x64, 0x21, 0x31, 0x4a, 0x51, 0x31, 0x24 add_z_to_x8: ld r0, Z+ @@ -599,16 +618,16 @@ add_z_to_x8: st X+, r1 clr r1 ret - + T0 = IDX0 T1 = 0 -CNT = 24 +CNT = 24 xchg_zx8: ldi CNT, 8 1: ld T0, X ld T1, Z st X+, T1 - st Z+, T0 + st Z+, T0 dec CNT brne 1b ret