/* skein1024_asm.S */ /* This file is part of the AVR-Crypto-Lib. Copyright (C) 2009 Daniel Otte (daniel.otte@rub.de) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ /* * \author Daniel Otte * \email daniel.otte@rub.de * \date 2009-03-25 * \license GPLv3 or later */ #include "avr-asm-macros.S" /******************************************************************************/ /* void skein1024_init(skein1024_ctx_t* ctx, uint16_t outsize_b){ skein_config_t conf; uint8_t null[UBI1024_BLOCKSIZE_B]; memset(null, 0, UBI1024_BLOCKSIZE_B); memset(&conf, 0, sizeof(skein_config_t)); conf.schema[0] = 'S'; conf.schema[1] = 'H'; conf.schema[2] = 'A'; conf.schema[3] = '3'; conf.version = 1; conf.out_length = outsize_b; ctx->outsize_b = outsize_b; ubi1024_init(&(ctx->ubictx), null, UBI_TYPE_CFG); ubi1024_lastBlock(&(ctx->ubictx), &conf, 256); ubi1024_init(&(ctx->ubictx), ctx->ubictx.g, UBI_TYPE_MSG); } */ /* * param ctx: r24:r25 * param outsize_b: r22:r23 */ UBICTX0 = 2 UBICTX1 = 3 CONF0 = 4 CONF1 = 5 .global skein1024_init skein1024_init: push_range 2, 5 stack_alloc_large 32+128-22 ; |<- 22 ->| adiw r30, 1 ; | CONF (32) | movw CONF0, r30 ; | null (128) | movw r26, r24 st X+, r22 st X+, r23 movw UBICTX0, r26 ldi r24, 'S' st Z+, r24 ldi r24, 'H' st Z+, r24 ldi r24, 'A' st Z+, r24 ldi r24, '3' st Z+, r24 ldi r24, 1 st Z+, r24 st Z+, r1 st Z+, r1 st Z+, r1 st Z+, r22 st Z+, r23 ldi 24, 128 1: st Z+, r1 dec r24 brne 1b /* call ubi1024_init*/ subi r30, lo8(128) sbci r31, hi8(128) movw r24, UBICTX0 movw r22, r30 ldi r20, 4 rcall ubi1024_init /* call ubi1024_lastBlock*/ movw r24, UBICTX0 movw r22, CONF0 ldi r21, 1 clr r20 rcall ubi1024_lastBlock /* call ubi1024_init*/ movw r24, UBICTX0 adiw r24, 16 movw r22, r24 movw r24, UBICTX0 ldi r20, 48 rcall ubi1024_init stack_free_large2 32+128-22 pop_range 2, 5 ret /******************************************************************************/ .global skein1024_nextBlock skein1024_nextBlock: adiw r24, 2 rjmp ubi1024_nextBlock /******************************************************************************/ .global skein1024_lastBlock skein1024_lastBlock: adiw r24, 2 rjmp ubi1024_lastBlock /******************************************************************************/ /* void skein1024_ctx2hash(void* dest, skein1024_ctx_t* ctx){ ubi1024_ctx_t uctx; uint16_t outsize_b; uint64_t counter=0; uint8_t outbuffer[UBI1024_BLOCKSIZE_B]; ubi1024_init(&(ctx->ubictx), ctx->ubictx.g, UBI_TYPE_OUT); outsize_b = ctx->outsize_b; while(1){ memcpy(&uctx, &(ctx->ubictx), sizeof(ubi1024_ctx_t)); ubi1024_lastBlock(&uctx, &counter, 64); ubi1024_ctx2hash(outbuffer, &uctx); if(outsize_b<=UBI1024_BLOCKSIZE){ memcpy(dest, outbuffer, (ctx->outsize_b+7)/8); break; }else{ memcpy(dest, outbuffer, UBI1024_BLOCKSIZE_B); dest = (uint8_t*)dest + UBI1024_BLOCKSIZE_B; outsize_b -= UBI1024_BLOCKSIZE; counter++; } } } */ /* * param dest: r24:r25 * param ctx: r22:r23 */ OUTSIZE_B0 = 16 OUTSIZE_B1 = 17 UCTX0 = 14 UCTX1 = 15 UBICTX0 = 12 UBICTX1 = 13 DEST0 = 10 DEST1 = 11 .global skein1024_ctx2hash skein1024_ctx2hash: push_range 10, 17 /* 144 || 8 || 128 */ stack_alloc_large 144+8+128 /* uctx || counter || outbuffer */ movw DEST0, r24 adiw r30, 1 movw UCTX0, r30 ldi r16, 144 add r30, r16 adc r31, r1 st Z+, r1 st Z+, r1 st Z+, r1 st Z+, r1 st Z+, r1 st Z+, r1 st Z+, r1 st Z+, r1 movw r26, 22 ld OUTSIZE_B0, X+ ld OUTSIZE_B1, X+ movw UBICTX0, r26 /* call ubi1024_init */ movw r24, UBICTX0 adiw r24, 16 movw r22, r24 movw r24, UBICTX0 ldi r20, 63 rcall ubi1024_init /* main loop */ /* copy ubictx in uctx*/ 1: movw r30, UCTX0 movw r26, UBICTX0 ldi r24, 144 2: ld r25, X+ st Z+, r25 dec r24 brne 2b /* call ubi1024_lastBlock */ movw r24, UCTX0 adiw r24, 63 adiw r24, 63 adiw r24, 18 movw r22, r24 movw r24, UCTX0 clr r21 ldi r20, 64 rcall ubi1024_lastBlock /* copy uctx->g to outbuffer */ movw r26, UCTX0 adiw r26, 16 movw r30, UCTX0 adiw r30, 63 adiw r30, 63 adiw r30, 18+8 ldi r24, 128 2: ld r25, X+ st Z+, r25 dec r24 brne 2b /* compare outsize_b with 1024*/ cpi OUTSIZE_B1, 5 brge 5f cpi OUTSIZE_B1, 4 brlo 3f tst OUTSIZE_B0 breq 3f 5: /* copy outbuffer to dest */ movw r30, DEST0 movw r26, UCTX0 adiw r26, 63 adiw r26, 63 adiw r26, 18+8 ldi r24, 128 6: ld r25, X+ st Z+, r25 dec r24 brne 6b /* store new dest */ movw DEST0, r30 /* adjust counter and outsize_b*/ subi OUTSIZE_B1, 2 movw r30, UCTX0 adiw r30, 63 adiw r30, 63 adiw r30, 18 ldi r24, 1 ld r25, Z add r25, r24 st Z+, r25 ldi r24, 7 6: ld r25, Z adc r25, r1 st Z+, r25 dec r24 brne 6b rjmp 1b 3: /* last iteraton */ movw r24, OUTSIZE_B0 adiw r24, 7 lsr r25 ror r24 lsr r25 ror r24 lsr r25 ror r24 movw r30, DEST0 movw r26, UCTX0 adiw r26, 63 adiw r26, 63 adiw r26, 18+8 tst r24 breq 8f 7: ld r25, X+ st Z+, r25 dec r24 brne 7b 8: stack_free_large3 144+8+128 pop_range 10, 17 ret /******************************************************************************/ /* void skein1024(void* dest, uint16_t outlength_b, const void* msg, uint32_t length_b){ skein1024_ctx_t ctx; skein1024_init(&ctx, outlength_b); while(length_b>SKEIN1024_BLOCKSIZE){ skein1024_nextBlock(&ctx, msg); msg = (uint8_t*)msg + SKEIN1024_BLOCKSIZE_B; length_b -= SKEIN1024_BLOCKSIZE; } skein1024_lastBlock(&ctx, msg, length_b); skein1024_ctx2hash(dest, &ctx); } */ /* * param dest: r24:r25 * param outlength_b: r22:r23 * param msg: r20:r21 * param length_b: r16:r19 */ LENGTH_B0 = 2 LENGTH_B1 = 3 LENGTH_B2 = 4 LENGTH_B3 = 5 DEST0 = 6 DEST1 = 7 MSG0 = 8 MSG1 = 9 CTX0 = 10 CTX1 = 11 .global skein1024 skein1024: push_range 2, 11 stack_alloc_large 146 adiw r30, 1 movw CTX0, r30 movw DEST0, r24 movw MSG0, r20 movw LENGTH_B0, r16 movw LENGTH_B2, r18 /* call skein1024_init */ movw r24, r30 rcall skein1024_init 1: tst LENGTH_B2 brne 4f tst LENGTH_B3 brne 4f /* call skein1024_lastBlock */ movw r24, CTX0 movw r22, MSG0 movw r20, LENGTH_B0 rcall skein1024_lastBlock /* call skein1024_ctx2hash */ movw r24, DEST0 movw r22, CTX0 rcall skein1024_ctx2hash /* return */ stack_free_large2 146 pop_range 2, 11 ret 4: /* process preceeding blocks */ movw r24, CTX0 movw r22, MSG0 rcall skein1024_nextBlock ldi r24, 128 add MSG0, r24 adc MSG0, r1 mov r24, LENGTH_B1 mov r25, LENGTH_B2 sbiw r24, 4 sbc LENGTH_B3, r1 mov LENGTH_B1, r24 mov LENGTH_B2, r25 rjmp 1b