/* twister-asm.S */ /* This file is part of the AVR-Crypto-Lib. Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ /** * \file twister-asm.S * \email daniel.otte@rub.de * \author Daniel Otte * \date 2008-12-22 * \license GPLv3 or later * */ #include "avr-asm-macros.S" twister_sbox: .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0 .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0 .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85 .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5 .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17 .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88 .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9 .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6 .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94 .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68 .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 /* * param ctx: r24:r25 * param msg: r22:r23 */ .global twister_mini_round twister_mini_round: movw r26, r24 movw r30, r22 adiw r26, 8*7 adiw r30, 8 ldi r21, 8 1: ld r22, X ld r23, -Z eor r22, r23 st X+, r22 dec r21 brne 1b /* * param ctx: r24:r25 * */ X_SAVE0 = 4 X_SAVE1 = 5 Y_SAVE0 = 6 Y_SAVE1 = 7 MDS0 = 8 MDS1 = 9 MDS2 = 10 MDS3 = 11 MDS4 = 12 MDS5 = 13 MDS6 = 14 MDS7 = 15 .global twister_blank_round twister_blank_round: push_range 4, 17 push r28 push r29 stack_alloc_large 64, r28, r29 movw X_SAVE0, r24 movw r30, r24 adiw r30, 63 adiw r30, 1+8 /* Z points behind counter */ movw r26, r24 adiw r26, 1 ldi r22, 8 1: /* "add" counter */ ld r16, -Z ld r21, X eor r21, r16 st X, r21 adiw r26, 8 dec r22 brne 1b /* decrement counter */ subi r16, 1 st Z+, r16 ldi r17, 7 1: ld r16, Z sbci r16, 0 st Z+, r16 dec r17 brne 1b movw r26, r24 adiw r28, 1 /* Y points to stack memory */ movw Y_SAVE0, r28 movw r24, r28 ldi r20, lo8(twister_sbox) ldi r21, hi8(twister_sbox) ldi r18, 8 1: ldi r19, 0 2: /* sbox substitution */ ld r0, X+ movw r30, r20 add r30, r0 adc r31, r1 lpm r0, Z movw r28, r24 mov r16, r18 add r16, r19 andi r16, 0x07 add r28, r16 adc r29, r1 st Y, r0 inc r19 cpi r19, 8 brne 2b adiw r24, 8 dec r18 brne 1b /* load MDS-Table to MDS0:MDS7 */ ldi r18, 1 mov MDS1, r18 mov MDS2, r18 mov MDS7, r18 ldi r18, 2 mov MDS0, r18 ldi r18, 5 mov MDS3, r18 ldi r18, 6 mov MDS6, r18 ldi r18, 7 mov MDS4, r18 ldi r18, 8 mov MDS5, r18 ldi r20, 0x4D /* reducer for gf256mul*/ ldi r16, 0 1: movw r26, X_SAVE0 add r26, r16 adc r27, r1 ldi r17, 8 2: mov r24, MDS0 movw r28, Y_SAVE0 add r28, r16 adc r29, r1 ld r22, Y rcall gf256mul mov r0, r24 mov r24, MDS1 ldd r22, Y+8 rcall gf256mul eor r0, r24 mov r24, MDS2 ldd r22, Y+8*2 rcall gf256mul eor r0, r24 mov r24, MDS3 ldd r22, Y+8*3 rcall gf256mul eor r0, r24 mov r24, MDS4 ldd r22, Y+8*4 rcall gf256mul eor r0, r24 mov r24, MDS5 ldd r22, Y+8*5 rcall gf256mul eor r0, r24 mov r24, MDS6 ldd r22, Y+8*6 rcall gf256mul eor r0, r24 mov r24, MDS7 ldd r22, Y+8*7 rcall gf256mul eor r0, r24 st X, r0 adiw r26, 8 mov r0, MDS7 mov MDS7, MDS6 mov MDS6, MDS5 mov MDS5, MDS4 mov MDS4, MDS3 mov MDS3, MDS2 mov MDS2, MDS1 mov MDS1, MDS0 mov MDS0, r0 dec r17 brne 2b 8: inc r16 cpi r16, 8 brne 1b 9: stack_free_large 64 pop r29 pop r28 pop_range 4, 17 ret /*********************************************************************/ A = 23 B = 22 P = 24 gf256mul: mov A, r24 clr P 1: lsr A breq 4f brcc 2f eor P, B 2: lsl B brcc 3f eor B, r20 3: rjmp 1b 4: brcc 2f eor P, B 2: ret /*********************************************************************/ /* twister_ctx2hash */ /* * param dest: r24:r25 * param ctx: r22:r23 * param hashsize_b: r20:r21 */ DEST_SAVE0 = 10 DEST_SAVE1 = 11 CTX_SAVE0 = 12 CTX_SAVE1 = 13 LEN_SAVE = 14 LEN32_SAVE = 15 TMP_SAVE0 = 16 TMP_SAVE1 = 17 .global twister_ctx2hash .global twister_small_ctx2hash .global twister_large_ctx2hash .global twister224_ctx2hash .global twister256_ctx2hash .global twister384_ctx2hash .global twister512_ctx2hash twister224_ctx2hash: ldi r20, lo8(224) ldi r21, hi8(224) rjmp twister_ctx2hash twister256_ctx2hash: ldi r20, lo8(256) ldi r21, hi8(256) rjmp twister_ctx2hash twister384_ctx2hash: ldi r20, lo8(384) ldi r21, hi8(384) rjmp twister_ctx2hash twister512_ctx2hash: ldi r20, lo8(512) ldi r21, hi8(512) ; rjmp twister_ctx2hash twister_large_ctx2hash: twister_small_ctx2hash: twister_ctx2hash: push_range 10, 17 push r28 push r29 stack_alloc_large 64 movw DEST_SAVE0, r24 movw CTX_SAVE0, r22 clr LEN32_SAVE sbrc r20, 5 inc LEN32_SAVE lsr r21 ror r20 lsr r21 ror r20 /* length is max 512 so we now only have to shift r20 */ swap r20 /* this is faster than 4 shifts */ andi r20, 0x0f add r20, LEN32_SAVE mov LEN_SAVE, r20 adiw r30, 1 movw TMP_SAVE0, r30 1: dec LEN_SAVE brmi 9f /* tmp <- ctx-s */ movw r30, TMP_SAVE0 movw r26, CTX_SAVE0 ldi r20, 64/4 3: ld r0, X+ st Z+, r0 ld r0, X+ st Z+, r0 ld r0, X+ st Z+, r0 ld r0, X+ st Z+, r0 dec r20 brne 3b movw r24, CTX_SAVE0 rcall twister_blank_round /* ctx-s ^= tmp */ movw r30, TMP_SAVE0 movw r26, CTX_SAVE0 ldi r20, 64 3: ld r0, X ld r21, Z+ eor r0, r21 st X+, r0 dec r20 brne 3b movw r24, CTX_SAVE0 rcall twister_blank_round movw r26, CTX_SAVE0 tst LEN_SAVE brne 2f tst LEN32_SAVE brne 5f 2: adiw r26, 8*7 movw r30, TMP_SAVE0 adiw r30, 8*7 movw r28, DEST_SAVE0 ldi r20, 8 3: ld r0, Z ld r21, X eor r0, r21 st Y+, r0 sbiw r26, 8 sbiw r30, 8 dec r20 brne 3b movw DEST_SAVE0, r28 7: rjmp 1b 5: adiw r26, 8*3 movw r30, TMP_SAVE0 adiw r30, 8*3 movw r28, DEST_SAVE0 ldi r20, 4 3: ld r0, Z ld r21, X eor r0, r21 st Y+, r0 sbiw r26, 8 sbiw r30, 8 dec r20 brne 3b 9: stack_free_large 64 pop r29 pop r28 pop_range 10, 17 ret /*********************************************************************/ /* void twister_small_nextBlock(twister_state_t *ctx, void *msg) */ /* * param ctx: r24:r25 * param msg: r22:r23 */ CTX_SAVE0 = 14 CTX_SAVE1 = 15 TMP_SAVE0 = 12 TMP_SAVE1 = 13 MSG_SAVE0 = 28 MSG_SAVE1 = 29 .global twister_small_nextBlock .global twister224_nextBlock .global twister256_nextBlock twister224_nextBlock: twister256_nextBlock: twister_small_nextBlock: push_range 12, 15 push r28 push r29 stack_alloc_large 64 adiw r30, 1 movw TMP_SAVE0, r30 movw CTX_SAVE0, r24 movw MSG_SAVE0, r22 movw r26, CTX_SAVE0 ldi r18, 64/8 1: ld r0, X+ st Z+, r0 ld r0, X+ st Z+, r0 ld r0, X+ st Z+, r0 ld r0, X+ st Z+, r0 ld r0, X+ st Z+, r0 ld r0, X+ st Z+, r0 ld r0, X+ st Z+, r0 ld r0, X+ st Z+, r0 dec r18 brne 1b rcall twister_mini_round adiw MSG_SAVE0, 8 movw r22, MSG_SAVE0 movw r24, CTX_SAVE0 rcall twister_mini_round adiw MSG_SAVE0, 8 movw r22, MSG_SAVE0 movw r24, CTX_SAVE0 rcall twister_mini_round movw r30, TMP_SAVE0 movw r26, CTX_SAVE0 ldi r18, 64 1: ld r0, X ld r23, Z eor r0, r23 st X+, r0 st Z+, r0 dec r18 brne 1b adiw MSG_SAVE0, 8 movw r22, MSG_SAVE0 movw r24, CTX_SAVE0 rcall twister_mini_round adiw MSG_SAVE0, 8 movw r22, MSG_SAVE0 movw r24, CTX_SAVE0 rcall twister_mini_round adiw MSG_SAVE0, 8 movw r22, MSG_SAVE0 movw r24, CTX_SAVE0 rcall twister_mini_round movw r30, TMP_SAVE0 movw r26, CTX_SAVE0 ldi r18, 64 1: ld r0, X ld r23, Z eor r0, r23 st X+, r0 st Z+, r0 dec r18 brne 1b adiw MSG_SAVE0, 8 movw r22, MSG_SAVE0 movw r24, CTX_SAVE0 rcall twister_mini_round adiw MSG_SAVE0, 8 movw r22, MSG_SAVE0 movw r24, CTX_SAVE0 rcall twister_mini_round movw r24, CTX_SAVE0 rcall twister_blank_round movw r30, TMP_SAVE0 movw r26, CTX_SAVE0 ldi r18, 64 1: ld r0, X ld r23, Z+ eor r0, r23 st X+, r0 dec r18 brne 1b adiw r26, 9 ldi r19, 2 ld r0, X add r0, r19 st X+, r0 ld r0, X adc r0, r1 st X+, r0 ld r0, X adc r0, r1 st X+, r0 ld r0, X adc r0, r1 st X+, r0 ld r0, X adc r0, r1 st X+, r0 ld r0, X adc r0, r1 st X+, r0 ld r0, X adc r0, r1 st X+, r0 ld r0, X adc r0, r1 st X+, r0 stack_free_large 64 pop r29 pop r28 pop_range 12, 15 ret