/* twister-large-asm.S */ /* This file is part of the AVR-Crypto-Lib. Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ /** * \file twister-large-asm.S * \email daniel.otte@rub.de * \author Daniel Otte * \date 2008-12-27 * \license GPLv3 or later * */ #include "avr-asm-macros.S" /* void checksum_update(twister_large_ctx_t *ctx, uint8_t col) */ /* * param ctx: r24:r25 * param col: r22 */ checksum_update: push r16 push r28 push r29 lsl r22 lsl r22 lsl r22 mov r16, r22 movw r30, r24 /* X points to ctx->state.s */ ldd r18, Z+7*8 ldd r19, Z+6*8 ldd r20, Z+5*8 ldd r21, Z+4*8 ldd r22, Z+3*8 ldd r23, Z+2*8 ldd r24, Z+1*8 ldd r25, Z+0*8 adiw r30, 63 adiw r30, 1+3*8 /* Z points at ctx->checksum[0][8] */ movw r28, r30 /* Y points at ctx->checksum[0][8] */ andi r16, 63 add r30, r16 /* Z points at ctx->checksum[col][8]*/ adc r31, r1 ldi r26, 8 add r16, r26 andi r16, 63 add r28, r16 adc r29, r1 /* Y points at ctx->checksum[(col+1)%8][8]*/ ld r0, -Y add r18, r0 ld r0, -Z eor r0, r18 st Z, r0 ld r0, -Y adc r19, r0 ld r0, -Z eor r0, r19 st Z, r0 ld r0, -Y adc r20, r0 ld r0, -Z eor r0, r20 st Z, r0 ld r0, -Y adc r21, r0 ld r0, -Z eor r0, r21 st Z, r0 ld r0, -Y adc r22, r0 ld r0, -Z eor r0, r22 st Z, r0 ld r0, -Y adc r23, r0 ld r0, -Z eor r0, r23 st Z, r0 ld r0, -Y adc r24, r0 ld r0, -Z eor r0, r24 st Z, r0 ld r0, -Y adc r25, r0 ld r0, -Z eor r0, r25 st Z, r0 pop r29 pop r28 pop r16 ret /*********************************************************************/ /* void twister_large_init(twister_large_ctx_t *ctx, uint16_t hashsize_b)*/ /* * param ctx: r24:r25 * param hashsize_b: r22:r23 */ .global twister384_init twister384_init: ldi r22, lo8(384) ldi r23, hi8(384) rjmp twister_large_init .global twister512_init twister512_init: ldi r22, lo8(512) ldi r23, hi8(512) .global twister_large_init twister_large_init: movw r30, r24 ldi r24, 64 1: st Z+, r1 dec r24 brne 1b dec r1 ldi r24, 8 1: st Z+, r1 dec r24 brne 1b inc r1 ldi r24, 8+64 1: st Z+, r1 dec r24 brne 1b subi r30, lo8(1+8+8+8*7+64) sbci r31, hi8(1+8+8+8*7+64) st Z, r23 std Z+8, r22 ret /*********************************************************************/ /* void twister_large_nextBlock(twister_state_t *ctx, void *msg) */ /* * param ctx: r24:r25 * param msg: r22:r23 */ CTX_SAVE0 = 14 CTX_SAVE1 = 15 TMP_SAVE0 = 12 TMP_SAVE1 = 13 MSG_SAVE0 = 28 MSG_SAVE1 = 29 .global twister_large_nextBlock .global twister384_nextBlock .global twister512_nextBlock twister384_nextBlock: twister512_nextBlock: twister_large_nextBlock: push_range 12, 15 push r28 push r29 stack_alloc_large 64 adiw r30, 1 movw TMP_SAVE0, r30 movw CTX_SAVE0, r24 movw MSG_SAVE0, r22 movw r26, CTX_SAVE0 ldi r18, 64/8 1: ld r0, X+ st Z+, r0 ld r0, X+ st Z+, r0 ld r0, X+ st Z+, r0 ld r0, X+ st Z+, r0 ld r0, X+ st Z+, r0 ld r0, X+ st Z+, r0 ld r0, X+ st Z+, r0 ld r0, X+ st Z+, r0 dec r18 brne 1b /* maxi round 1 */ movw r24, CTX_SAVE0 ldi r22, 0 rcall checksum_update movw r22, MSG_SAVE0 movw r24, CTX_SAVE0 rcall twister_mini_round movw r24, CTX_SAVE0 ldi r22, 1 rcall checksum_update adiw MSG_SAVE0, 8 movw r22, MSG_SAVE0 movw r24, CTX_SAVE0 rcall twister_mini_round movw r24, CTX_SAVE0 ldi r22, 2 rcall checksum_update adiw MSG_SAVE0, 8 movw r22, MSG_SAVE0 movw r24, CTX_SAVE0 rcall twister_mini_round movw r30, TMP_SAVE0 movw r26, CTX_SAVE0 ldi r18, 64 1: ld r0, X ld r23, Z eor r0, r23 st X+, r0 st Z+, r0 dec r18 brne 1b /* maxi round 2 */ movw r24, CTX_SAVE0 ldi r22, 3 rcall checksum_update adiw MSG_SAVE0, 8 movw r22, MSG_SAVE0 movw r24, CTX_SAVE0 rcall twister_mini_round movw r24, CTX_SAVE0 rcall twister_blank_round movw r24, CTX_SAVE0 ldi r22, 4 rcall checksum_update adiw MSG_SAVE0, 8 movw r22, MSG_SAVE0 movw r24, CTX_SAVE0 rcall twister_mini_round movw r30, TMP_SAVE0 movw r26, CTX_SAVE0 ldi r18, 64 1: ld r0, X ld r23, Z eor r0, r23 st X+, r0 st Z+, r0 dec r18 brne 1b /* maxi round 3 */ movw r24, CTX_SAVE0 ldi r22, 5 rcall checksum_update adiw MSG_SAVE0, 8 movw r22, MSG_SAVE0 movw r24, CTX_SAVE0 rcall twister_mini_round movw r24, CTX_SAVE0 ldi r22, 6 rcall checksum_update adiw MSG_SAVE0, 8 movw r22, MSG_SAVE0 movw r24, CTX_SAVE0 rcall twister_mini_round movw r24, CTX_SAVE0 ldi r22, 7 rcall checksum_update adiw MSG_SAVE0, 8 movw r22, MSG_SAVE0 movw r24, CTX_SAVE0 rcall twister_mini_round movw r24, CTX_SAVE0 rcall twister_blank_round movw r30, TMP_SAVE0 movw r26, CTX_SAVE0 ldi r18, 64 1: ld r0, X ld r23, Z+ eor r0, r23 st X+, r0 dec r18 brne 1b adiw r26, 9 ldi r19, 2 ld r0, X add r0, r19 st X+, r0 ld r0, X adc r0, r1 st X+, r0 ld r0, X adc r0, r1 st X+, r0 ld r0, X adc r0, r1 st X+, r0 ld r0, X adc r0, r1 st X+, r0 ld r0, X adc r0, r1 st X+, r0 ld r0, X adc r0, r1 st X+, r0 ld r0, X adc r0, r1 st X+, r0 stack_free_large 64 pop r29 pop r28 pop_range 12, 15 ret /*********************************************************************/ /* void twister_large_lastBlock(twister_state_t *ctx, void *msg, uint16_t length_b) */ /* * param ctx: r24:r25 * param msg: r22:r23 * param length_b: r20:r21 */ TMP_SAVE0 = 12 TMP_SAVE1 = 13 CTX_SAVE0 = 14 CTX_SAVE1 = 15 LEN_SAVE0 = 16 LEN_SAVE1 = 17 MSG_SAVE0 = 28 MSG_SAVE1 = 29 .global twister_large_lastBlock .global twister384_lastBlock .global twister512_lastBlock twister384_lastBlock: twister512_lastBlock: twister_large_lastBlock: push_range 12, 17 push r28 push r29 stack_alloc_large 64 adiw r30, 1 movw TMP_SAVE0, r30 movw CTX_SAVE0, r24 movw MSG_SAVE0, r22 movw LEN_SAVE0, r20 1: cpi LEN_SAVE1, 2 brmi 2f movw r24, CTX_SAVE0 movw r22, MSG_SAVE0 rcall twister_large_nextBlock adiw MSG_SAVE0, 8 subi LEN_SAVE1, 2 rjmp 1b 2: movw r18, LEN_SAVE0 lsr r19 ror r18 lsr r18 lsr r18 ldi r19, 63 movw r26, MSG_SAVE0 movw r30, TMP_SAVE0 ldi r20, 0x80 sub r19, r18 /* r18: bytes to copy, r19: bytes to clear */ ld r0, X+ 3: tst r18 breq 4f 31: st Z+, r0 ld r0, X+ dec r18 brne 31b 4: mov r18, LEN_SAVE0 andi r18, 0x07 ldi r20, 0x80 breq 5f 4: lsr r20 dec r18 brne 4b or r20, r0 rjmp 5f 5: st Z+, r20 tst r19 breq 7f 6: st Z+, r1 dec r19 brne 6b 7: movw r24, CTX_SAVE0 movw r22, TMP_SAVE0 rcall twister_large_nextBlock ldi r19, 2 clr r18 sub r18, LEN_SAVE0 sbc r19, LEN_SAVE1 movw r26, CTX_SAVE0 adiw r26, 63 adiw r26, 1+8 ld r0, X sub r0, r18 st X+, r0 ld r0, X sbc r0, r19 st X+, r0 ld r0, X sbc r0, r1 st X+, r0 ld r0, X sbc r0, r1 st X+, r0 ld r0, X sbc r0, r1 st X+, r0 ld r0, X sbc r0, r1 st X+, r0 ld r0, X sbc r0, r1 st X+, r0 ld r0, X sbc r0, r1 st X+, r0 sbiw r26, 8 movw r24, CTX_SAVE0 movw r22, r26 rcall twister_mini_round movw r24, CTX_SAVE0 movw r22, CTX_SAVE0 ldi r16, 64+8+8 add r22, r16 adc r23, r1 movw r30, r22 ldi r26, 8 1: ld r12, Z+ ld r13, Z+ ld r16, Z+ ld r17, Z+ ld r18, Z+ ld r19, Z+ ld r20, Z+ ld r21, Z+ st -Z, r12 st -Z, r13 st -Z, r16 st -Z, r17 st -Z, r18 st -Z, r19 st -Z, r20 st -Z, r21 adiw r30, 8 dec r26 brne 1b movw r24, CTX_SAVE0 movw r22, CTX_SAVE0 ldi r26, 64+2*8 add r22, r26 adc r23, r1 rcall twister_small_nextBlock stack_free_large 64 pop r29 pop r28 pop_range 12, 17 ret