X-Git-Url: https://git.cryptolib.org/?p=avr-crypto-lib.git;a=blobdiff_plain;f=skein%2Fskein1024_asm.S;fp=skein%2Fskein1024_asm.S;h=8983f6a2e539381b61bceeb93cce23cd499837e5;hp=0000000000000000000000000000000000000000;hb=4f50c75ee5a6cc88bf7ea71957ed509e298e6c25;hpb=7701e318e4e2bac7f84dbf6e368f1501814948fc diff --git a/skein/skein1024_asm.S b/skein/skein1024_asm.S new file mode 100644 index 0000000..8983f6a --- /dev/null +++ b/skein/skein1024_asm.S @@ -0,0 +1,358 @@ +/* skein1024_asm.S */ +/* + This file is part of the AVR-Crypto-Lib. + Copyright (C) 2009 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ +/* + * \author Daniel Otte + * \email daniel.otte@rub.de + * \date 2009-03-25 + * \license GPLv3 or later + */ + +#include "avr-asm-macros.S" + +/******************************************************************************/ +/* +void skein1024_init(skein1024_ctx_t* ctx, uint16_t outsize_b){ + skein_config_t conf; + uint8_t null[UBI1024_BLOCKSIZE_B]; + memset(null, 0, UBI1024_BLOCKSIZE_B); + memset(&conf, 0, sizeof(skein_config_t)); + conf.schema[0] = 'S'; + conf.schema[1] = 'H'; + conf.schema[2] = 'A'; + conf.schema[3] = '3'; + conf.version = 1; + conf.out_length = outsize_b; + ctx->outsize_b = outsize_b; + ubi1024_init(&(ctx->ubictx), null, UBI_TYPE_CFG); + ubi1024_lastBlock(&(ctx->ubictx), &conf, 256); + ubi1024_init(&(ctx->ubictx), ctx->ubictx.g, UBI_TYPE_MSG); +} +*/ +/* + * param ctx: r24:r25 + * param outsize_b: r22:r23 + */ +UBICTX0 = 2 +UBICTX1 = 3 +CONF0 = 4 +CONF1 = 5 +.global skein1024_init +skein1024_init: + push_range 2, 5 + stack_alloc_large 32+128-22 ; |<- 22 ->| + adiw r30, 1 ; | CONF (32) | + movw CONF0, r30 ; | null (128) | + movw r26, r24 + st X+, r22 + st X+, r23 + movw UBICTX0, r26 + ldi r24, 'S' + st Z+, r24 + ldi r24, 'H' + st Z+, r24 + ldi r24, 'A' + st Z+, r24 + ldi r24, '3' + st Z+, r24 + ldi r24, 1 + st Z+, r24 + st Z+, r1 + st Z+, r1 + st Z+, r1 + st Z+, r22 + st Z+, r23 + ldi 24, 128 +1: st Z+, r1 + dec r24 + brne 1b + /* call ubi1024_init*/ + subi r30, lo8(128) + sbci r31, hi8(128) + movw r24, UBICTX0 + movw r22, r30 + ldi r20, 4 + rcall ubi1024_init + /* call ubi1024_lastBlock*/ + movw r24, UBICTX0 + movw r22, CONF0 + ldi r21, 1 + clr r20 + rcall ubi1024_lastBlock + /* call ubi1024_init*/ + movw r24, UBICTX0 + adiw r24, 16 + movw r22, r24 + movw r24, UBICTX0 + ldi r20, 48 + rcall ubi1024_init + stack_free_large2 32+128-22 + pop_range 2, 5 + ret + +/******************************************************************************/ +.global skein1024_nextBlock +skein1024_nextBlock: + adiw r24, 2 + rjmp ubi1024_nextBlock + +/******************************************************************************/ +.global skein1024_lastBlock +skein1024_lastBlock: + adiw r24, 2 + rjmp ubi1024_lastBlock + +/******************************************************************************/ +/* +void skein1024_ctx2hash(void* dest, skein1024_ctx_t* ctx){ + ubi1024_ctx_t uctx; + uint16_t outsize_b; + + uint64_t counter=0; + uint8_t outbuffer[UBI1024_BLOCKSIZE_B]; + ubi1024_init(&(ctx->ubictx), ctx->ubictx.g, UBI_TYPE_OUT); + + outsize_b = ctx->outsize_b; + while(1){ + memcpy(&uctx, &(ctx->ubictx), sizeof(ubi1024_ctx_t)); + ubi1024_lastBlock(&uctx, &counter, 64); + ubi1024_ctx2hash(outbuffer, &uctx); + if(outsize_b<=UBI1024_BLOCKSIZE){ + memcpy(dest, outbuffer, (ctx->outsize_b+7)/8); + break; + }else{ + memcpy(dest, outbuffer, UBI1024_BLOCKSIZE_B); + dest = (uint8_t*)dest + UBI1024_BLOCKSIZE_B; + outsize_b -= UBI1024_BLOCKSIZE; + counter++; + } + } +} +*/ +/* + * param dest: r24:r25 + * param ctx: r22:r23 + */ + OUTSIZE_B0 = 16 + OUTSIZE_B1 = 17 + UCTX0 = 14 + UCTX1 = 15 + UBICTX0 = 12 + UBICTX1 = 13 + DEST0 = 10 + DEST1 = 11 +.global skein1024_ctx2hash +skein1024_ctx2hash: + push_range 10, 17 + /* 144 || 8 || 128 */ + stack_alloc_large 144+8+128 /* uctx || counter || outbuffer */ + movw DEST0, r24 + adiw r30, 1 + movw UCTX0, r30 + ldi r16, 144 + add r30, r16 + adc r31, r1 + st Z+, r1 + st Z+, r1 + st Z+, r1 + st Z+, r1 + st Z+, r1 + st Z+, r1 + st Z+, r1 + st Z+, r1 + movw r26, 22 + ld OUTSIZE_B0, X+ + ld OUTSIZE_B1, X+ + movw UBICTX0, r26 + /* call ubi1024_init */ + movw r24, UBICTX0 + adiw r24, 16 + movw r22, r24 + movw r24, UBICTX0 + ldi r20, 63 + rcall ubi1024_init + + /* main loop */ + /* copy ubictx in uctx*/ +1: movw r30, UCTX0 + movw r26, UBICTX0 + ldi r24, 144 +2: ld r25, X+ + st Z+, r25 + dec r24 + brne 2b + /* call ubi1024_lastBlock */ + movw r24, UCTX0 + adiw r24, 63 + adiw r24, 63 + adiw r24, 18 + movw r22, r24 + movw r24, UCTX0 + clr r21 + ldi r20, 64 + rcall ubi1024_lastBlock + /* copy uctx->g to outbuffer */ + movw r26, UCTX0 + adiw r26, 16 + movw r30, UCTX0 + adiw r30, 63 + adiw r30, 63 + adiw r30, 18+8 + ldi r24, 128 +2: ld r25, X+ + st Z+, r25 + dec r24 + brne 2b + /* compare outsize_b with 1024*/ + cpi OUTSIZE_B1, 5 + brge 5f + cpi OUTSIZE_B1, 4 + brlo 3f + tst OUTSIZE_B0 + breq 3f +5: /* copy outbuffer to dest */ + movw r30, DEST0 + movw r26, UCTX0 + adiw r26, 63 + adiw r26, 63 + adiw r26, 18+8 + ldi r24, 128 +6: ld r25, X+ + st Z+, r25 + dec r24 + brne 6b + /* store new dest */ + movw DEST0, r30 + /* adjust counter and outsize_b*/ + subi OUTSIZE_B1, 2 + movw r30, UCTX0 + adiw r30, 63 + adiw r30, 63 + adiw r30, 18 + ldi r24, 1 + ld r25, Z + add r25, r24 + st Z+, r25 + ldi r24, 7 +6: ld r25, Z + adc r25, r1 + st Z+, r25 + dec r24 + brne 6b + rjmp 1b +3: /* last iteraton */ + movw r24, OUTSIZE_B0 + adiw r24, 7 + lsr r25 + ror r24 + lsr r25 + ror r24 + lsr r25 + ror r24 + movw r30, DEST0 + movw r26, UCTX0 + adiw r26, 63 + adiw r26, 63 + adiw r26, 18+8 + tst r24 + breq 8f +7: ld r25, X+ + st Z+, r25 + dec r24 + brne 7b +8: + stack_free_large3 144+8+128 + pop_range 10, 17 + ret + +/******************************************************************************/ +/* +void skein1024(void* dest, uint16_t outlength_b, const void* msg, uint32_t length_b){ + skein1024_ctx_t ctx; + skein1024_init(&ctx, outlength_b); + while(length_b>SKEIN1024_BLOCKSIZE){ + skein1024_nextBlock(&ctx, msg); + msg = (uint8_t*)msg + SKEIN1024_BLOCKSIZE_B; + length_b -= SKEIN1024_BLOCKSIZE; + } + skein1024_lastBlock(&ctx, msg, length_b); + skein1024_ctx2hash(dest, &ctx); +} +*/ +/* + * param dest: r24:r25 + * param outlength_b: r22:r23 + * param msg: r20:r21 + * param length_b: r16:r19 + */ +LENGTH_B0 = 2 +LENGTH_B1 = 3 +LENGTH_B2 = 4 +LENGTH_B3 = 5 +DEST0 = 6 +DEST1 = 7 +MSG0 = 8 +MSG1 = 9 +CTX0 = 10 +CTX1 = 11 +.global skein1024 +skein1024: + push_range 2, 11 + stack_alloc_large 146 + adiw r30, 1 + movw CTX0, r30 + movw DEST0, r24 + movw MSG0, r20 + movw LENGTH_B0, r16 + movw LENGTH_B2, r18 + /* call skein1024_init */ + movw r24, r30 + rcall skein1024_init +1: tst LENGTH_B2 + brne 4f + tst LENGTH_B3 + brne 4f + /* call skein1024_lastBlock */ + movw r24, CTX0 + movw r22, MSG0 + movw r20, LENGTH_B0 + rcall skein1024_lastBlock + /* call skein1024_ctx2hash */ + movw r24, DEST0 + movw r22, CTX0 + rcall skein1024_ctx2hash + /* return */ + stack_free_large2 146 + pop_range 2, 11 + ret + +4: /* process preceeding blocks */ + movw r24, CTX0 + movw r22, MSG0 + rcall skein1024_nextBlock + ldi r24, 128 + add MSG0, r24 + adc MSG0, r1 + mov r24, LENGTH_B1 + mov r25, LENGTH_B2 + sbiw r24, 4 + sbc LENGTH_B3, r1 + mov LENGTH_B1, r24 + mov LENGTH_B2, r25 + rjmp 1b +