/* twister-asm.S */ /* This file is part of the AVR-Crypto-Lib. Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ /** * \file twister-asm.S * \email daniel.otte@rub.de * \author Daniel Otte * \date 2008-12-22 * \license GPLv3 or later * */ #include "avr-asm-macros.S" twister_sbox: .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0 .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0 .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85 .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5 .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17 .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88 .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9 .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6 .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94 .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68 .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 /* * param ctx: r24:r25 * */ X_SAVE0 = 4 X_SAVE1 = 5 Y_SAVE0 = 6 Y_SAVE1 = 7 MDS0 = 8 MDS1 = 9 MDS2 = 10 MDS3 = 11 MDS4 = 12 MDS5 = 13 MDS6 = 14 MDS7 = 15 IDX1 = 18 IDX2 = 19 TMP = 21 .global twister_blank_round twister_blank_round: push_range 4, 17 push r28 push r29 stack_alloc_large 64, r28, r29 movw r30, r24 adiw r30, 63 adiw r30, 1+8 /* Z points behind counter */ movw r26, r24 adiw r26, 1 ldi r22, 8 1: /* "add" counter */ ld r16, -Z ld r21, X eor r21, r16 st X, r21 adiw r26, 8 dec r22 brne 1b /* decrement counter */ subi r16, 1 st Z+, r16 ldi r17, 7 1: ld r16, Z sbci r16, 0 st Z+, r16 dec r17 brne 1b movw r26, r24 adiw r28, 1 /* Y points to stack memory */ movw Y_SAVE0, r28 ldi r20, lo8(twister_sbox) ldi r21, hi8(twister_sbox) ldi r19, 64 1: /* sbox substitution */ ld r0, X+ movw r30, r20 add r30, r0 adc r31, r1 lpm r0, Z st Y+, r0 dec r19 brne 1b /* load MDS-Table to MDS0:MDS7 */ /* ldi r18, 1 mov MDS1, r18 mov MDS6, r18 mov MDS7, r18 ldi r18, 2 mov MDS0, r18 ldi r18, 5 mov MDS5, r18 ldi r18, 6 mov MDS2, r18 ldi r18, 7 mov MDS4, r18 ldi r18, 8 mov MDS3, r18 */ ldi r18, 1 mov MDS1, r18 mov MDS2, r18 mov MDS7, r18 ldi r18, 2 mov MDS0, r18 ldi r18, 5 mov MDS3, r18 ldi r18, 6 mov MDS6, r18 ldi r18, 7 mov MDS4, r18 ldi r18, 8 mov MDS5, r18 ; sbiw r28, 63 ; sbiw r28, 1 /* Y points again at tmp buffer */ ; movw Y_SAVE0, r28 ; sbiw r26, 63 ; sbiw r26, 1 /* X points again at state buffer */ movw X_SAVE0, r24 ldi r20, 0x4D /* reducer for gf256mul*/ ldi r16, 0 ; rjmp 9f 1: mov IDX1, r16 movw r26, X_SAVE0 add r26, r16 adc r27, r1 ldi r17, 0 2: mov IDX2, r17 mov r24, MDS0 movw r28, Y_SAVE0 add r28, IDX1 adc r29, r1 ld r22, Y rcall gf256mul mov r0, r24 mov r24, MDS1 movw r28, Y_SAVE0 adiw r28, 8 inc IDX1 andi IDX1, 0x07 add r28, IDX1 adc r29, r1 ld r22, Y rcall gf256mul eor r0, r24 mov r24, MDS2 movw r28, Y_SAVE0 adiw r28, 8*2 inc IDX1 andi IDX1, 0x07 add r28, IDX1 adc r29, r1 ld r22, Y rcall gf256mul eor r0, r24 mov r24, MDS3 movw r28, Y_SAVE0 adiw r28, 8*3 inc IDX1 andi IDX1, 0x07 add r28, IDX1 adc r29, r1 ld r22, Y rcall gf256mul eor r0, r24 mov r24, MDS4 movw r28, Y_SAVE0 adiw r28, 8*4 inc IDX1 andi IDX1, 0x07 add r28, IDX1 adc r29, r1 ld r22, Y rcall gf256mul eor r0, r24 mov r24, MDS5 movw r28, Y_SAVE0 adiw r28, 8*5 inc IDX1 andi IDX1, 0x07 add r28, IDX1 adc r29, r1 ld r22, Y rcall gf256mul eor r0, r24 mov r24, MDS6 movw r28, Y_SAVE0 adiw r28, 8*6 inc IDX1 andi IDX1, 0x07 add r28, IDX1 adc r29, r1 ld r22, Y rcall gf256mul eor r0, r24 mov r24, MDS7 movw r28, Y_SAVE0 adiw r28, 8*7 inc IDX1 andi IDX1, 0x07 add r28, IDX1 adc r29, r1 ld r22, Y rcall gf256mul eor r0, r24 st X, r0 adiw r26, 8 /* mov r0, MDS0 mov MDS0, MDS1 mov MDS1, MDS2 mov MDS2, MDS3 mov MDS3, MDS4 mov MDS4, MDS5 mov MDS5, MDS6 mov MDS6, MDS7 mov MDS7, r0 */ mov r0, MDS7 mov MDS7, MDS6 mov MDS6, MDS5 mov MDS5, MDS4 mov MDS4, MDS3 mov MDS3, MDS2 mov MDS2, MDS1 mov MDS1, MDS0 mov MDS0, r0 cpi r17, 7 breq 8f inc r17 rjmp 2b 8: mov r0, MDS7 mov MDS7, MDS6 mov MDS6, MDS5 mov MDS5, MDS4 mov MDS4, MDS3 mov MDS3, MDS2 mov MDS2, MDS1 mov MDS1, MDS0 mov MDS0, r0 cpi r16, 7 breq 9f inc r16 rjmp 1b 9: stack_free_large 64 pop r29 pop r28 pop_range 4, 17 ret A = 23 B = 22 P = 24 gf256mul: mov A, r24 clr P 1: lsr A breq 4f brcc 2f eor P, B 2: lsl B brcc 3f eor B, r20 3: rjmp 1b 4: brcc 2f eor P, B 2: ret