/* serpent-sboxes-fast.S */ /* This file is part of the AVR-Crypto-Lib. Copyright (C) 2006-2015 Daniel Otte (bg@nerilex.org) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ /* * File: serpent-sboxes-fast.S * Author: Daniel Otte * Date: 2008-08-07 * License: GPLv3 or later * Description: Implementation of the serpent sbox function. * */ #include #include "avr-asm-macros.S" serpent_sbox_fast: .byte 0x33, 0x88, 0xFF, 0x11, 0xAA, 0x66, 0x55, 0xBB .byte 0xEE, 0xDD, 0x44, 0x22, 0x77, 0x00, 0x99, 0xCC .byte 0xFF, 0xCC, 0x22, 0x77, 0x99, 0x00, 0x55, 0xAA .byte 0x11, 0xBB, 0xEE, 0x88, 0x66, 0xDD, 0x33, 0x44 .byte 0x88, 0x66, 0x77, 0x99, 0x33, 0xCC, 0xAA, 0xFF .byte 0xDD, 0x11, 0xEE, 0x44, 0x00, 0xBB, 0x55, 0x22 .byte 0x00, 0xFF, 0xBB, 0x88, 0xCC, 0x99, 0x66, 0x33 .byte 0xDD, 0x11, 0x22, 0x44, 0xAA, 0x77, 0x55, 0xEE .byte 0x11, 0xFF, 0x88, 0x33, 0xCC, 0x00, 0xBB, 0x66 .byte 0x22, 0x55, 0x44, 0xAA, 0x99, 0xEE, 0x77, 0xDD .byte 0xFF, 0x55, 0x22, 0xBB, 0x44, 0xAA, 0x99, 0xCC .byte 0x00, 0x33, 0xEE, 0x88, 0xDD, 0x66, 0x77, 0x11 .byte 0x77, 0x22, 0xCC, 0x55, 0x88, 0x44, 0x66, 0xBB .byte 0xEE, 0x99, 0x11, 0xFF, 0xDD, 0x33, 0xAA, 0x00 .byte 0x11, 0xDD, 0xFF, 0x00, 0xEE, 0x88, 0x22, 0xBB .byte 0x77, 0x44, 0xCC, 0xAA, 0x99, 0x33, 0x55, 0x66 serpent_sbox_inv_fast: .byte 0xDD, 0x33, 0xBB, 0x00, 0xAA, 0x66, 0x55, 0xCC .byte 0x11, 0xEE, 0x44, 0x77, 0xFF, 0x99, 0x88, 0x22 .byte 0x55, 0x88, 0x22, 0xEE, 0xFF, 0x66, 0xCC, 0x33 .byte 0xBB, 0x44, 0x77, 0x99, 0x11, 0xDD, 0xAA, 0x00 .byte 0xCC, 0x99, 0xFF, 0x44, 0xBB, 0xEE, 0x11, 0x22 .byte 0x00, 0x33, 0x66, 0xDD, 0x55, 0x88, 0xAA, 0x77 .byte 0x00, 0x99, 0xAA, 0x77, 0xBB, 0xEE, 0x66, 0xDD .byte 0x33, 0x55, 0xCC, 0x22, 0x44, 0x88, 0xFF, 0x11 .byte 0x55, 0x00, 0x88, 0x33, 0xAA, 0x99, 0x77, 0xEE .byte 0x22, 0xCC, 0xBB, 0x66, 0x44, 0xFF, 0xDD, 0x11 .byte 0x88, 0xFF, 0x22, 0x99, 0x44, 0x11, 0xDD, 0xEE .byte 0xBB, 0x66, 0x55, 0x33, 0x77, 0xCC, 0xAA, 0x00 .byte 0xFF, 0xAA, 0x11, 0xDD, 0x55, 0x33, 0x66, 0x00 .byte 0x44, 0x99, 0xEE, 0x77, 0x22, 0xCC, 0x88, 0xBB .byte 0x33, 0x00, 0x66, 0xDD, 0x99, 0xEE, 0xFF, 0x88 .byte 0x55, 0xCC, 0xBB, 0x77, 0xAA, 0x11, 0x44, 0x22 /* * void ip(uint32_t *i, uint8_t *o){ */ /* * param i is given in r24:r25 * parma o is given in r22:r23 */ .global serpent_ip serpent_ip: push_range 2, 17 movw r26, r24 ldi r24, 16 clr r31 ldi r30, 2 1: ld r25, X+ st Z+, r25 dec r24 brne 1b /* now the whole input is loaded in r2-r18 */ movw r26, r22 ldi r21, 4 4: ldi r20, 8 2: lsr r2 ror r19 lsr r6 ror 19 lsr r10 ror r19 lsr r14 ror 19 sbrc r20, 0 st X+, r19 dec r20 brne 2b ldi r20, 15 ldi r30, 2 3: ldd r19, Z+1 st Z+, r19 dec r20 brne 3b dec r21 brne 4b pop_range 2, 17 ret /* * void serpent_fp(uint32_t *i, uint8_t *o){ */ /* * param i is given in r24:r25 * parma o is given in r22:r23 */ .global serpent_fp serpent_fp: movw r26, r24 movw r30, r22 ldi r18, 4 1: ldi r19, 8 2: sbrs r19, 0 ld r24, X+ 3: lsr r24 ror r20 lsr r24 ror r21 lsr r24 ror r22 lsr r24 ror r23 dec r19 brne 2b st Z+, r20 std Z+3, r21 std Z+7, r22 std Z+11, r23 dec r18 brne 1b ret /* * void inv_sbox128(void * w, uint8_t box) */ .global inv_sbox128 inv_sbox128: andi r22, 0x07 ori r22, 0x08 rjmp sbox128x_fast /* * void sbox128(void * w, uint8_t box); */ /* * param w is passed in r24:r25 * param box is passed in r22 */ .global sbox128 sbox128: andi r22, 0x07 sbox128x_fast: stack_alloc 16 adiw r30, 1 push_ r24, r25, r22, r30, r31 movw r22, r30 /* Z points to the stack buffer */ rcall serpent_ip pop_ r27, r26, r22 ldi r25, hi8(serpent_sbox_fast) ldi r24, lo8(serpent_sbox_fast) swap r22 /* r22 *= 16 */ add r24, r22 adc r25, r1 /* now we have X pointing to the buffer and (r24:r25) pointing to the SBox */ ldi r22, 16 1: movw r30, r24 ld r18, X mov r20, r18 andi r18, 0x0f add r30, r18 adc r31, r1 lpm r19, Z 2: swap r20 andi r20, 0x0f movw r30, r24 add r30, r20 adc r31, r1 lpm r21, Z 3: andi r19, 0x0F andi r21, 0xF0 or r19, r21 st X+, r19 dec r22 brne 1b pop_ r23, r22 movw r24, r26 sbiw r24, 16 rcall serpent_fp stack_free 16 ret