--- /dev/null
+/* serpent-sboxes-fast.S */
+/*
+ This file is part of the AVR-Crypto-Lib.
+ Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de)
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+ * File: serpent-sboxes-fast.S
+ * Author: Daniel Otte
+ * Date: 2008-08-07
+ * License: GPLv3 or later
+ * Description: Implementation of the serpent sbox function.
+ *
+ */
+
+#include <avr/io.h>
+#include "avr-asm-macros.S"
+
+
+serpent_sbox_fast:
+ .byte 0x33, 0x88, 0xFF, 0x11, 0xAA, 0x66, 0x55, 0xBB
+ .byte 0xEE, 0xDD, 0x44, 0x22, 0x77, 0x00, 0x99, 0xCC
+ .byte 0xFF, 0xCC, 0x22, 0x77, 0x99, 0x00, 0x55, 0xAA
+ .byte 0x11, 0xBB, 0xEE, 0x88, 0x66, 0xDD, 0x33, 0x44
+ .byte 0x88, 0x66, 0x77, 0x99, 0x33, 0xCC, 0xAA, 0xFF
+ .byte 0xDD, 0x11, 0xEE, 0x44, 0x00, 0xBB, 0x55, 0x22
+ .byte 0x00, 0xFF, 0xBB, 0x88, 0xCC, 0x99, 0x66, 0x33
+ .byte 0xDD, 0x11, 0x22, 0x44, 0xAA, 0x77, 0x55, 0xEE
+ .byte 0x11, 0xFF, 0x88, 0x33, 0xCC, 0x00, 0xBB, 0x66
+ .byte 0x22, 0x55, 0x44, 0xAA, 0x99, 0xEE, 0x77, 0xDD
+ .byte 0xFF, 0x55, 0x22, 0xBB, 0x44, 0xAA, 0x99, 0xCC
+ .byte 0x00, 0x33, 0xEE, 0x88, 0xDD, 0x66, 0x77, 0x11
+ .byte 0x77, 0x22, 0xCC, 0x55, 0x88, 0x44, 0x66, 0xBB
+ .byte 0xEE, 0x99, 0x11, 0xFF, 0xDD, 0x33, 0xAA, 0x00
+ .byte 0x11, 0xDD, 0xFF, 0x00, 0xEE, 0x88, 0x22, 0xBB
+ .byte 0x77, 0x44, 0xCC, 0xAA, 0x99, 0x33, 0x55, 0x66
+
+serpent_sbox_inv_fast:
+ .byte 0xDD, 0x33, 0xBB, 0x00, 0xAA, 0x66, 0x55, 0xCC
+ .byte 0x11, 0xEE, 0x44, 0x77, 0xFF, 0x99, 0x88, 0x22
+ .byte 0x55, 0x88, 0x22, 0xEE, 0xFF, 0x66, 0xCC, 0x33
+ .byte 0xBB, 0x44, 0x77, 0x99, 0x11, 0xDD, 0xAA, 0x00
+ .byte 0xCC, 0x99, 0xFF, 0x44, 0xBB, 0xEE, 0x11, 0x22
+ .byte 0x00, 0x33, 0x66, 0xDD, 0x55, 0x88, 0xAA, 0x77
+ .byte 0x00, 0x99, 0xAA, 0x77, 0xBB, 0xEE, 0x66, 0xDD
+ .byte 0x33, 0x55, 0xCC, 0x22, 0x44, 0x88, 0xFF, 0x11
+ .byte 0x55, 0x00, 0x88, 0x33, 0xAA, 0x99, 0x77, 0xEE
+ .byte 0x22, 0xCC, 0xBB, 0x66, 0x44, 0xFF, 0xDD, 0x11
+ .byte 0x88, 0xFF, 0x22, 0x99, 0x44, 0x11, 0xDD, 0xEE
+ .byte 0xBB, 0x66, 0x55, 0x33, 0x77, 0xCC, 0xAA, 0x00
+ .byte 0xFF, 0xAA, 0x11, 0xDD, 0x55, 0x33, 0x66, 0x00
+ .byte 0x44, 0x99, 0xEE, 0x77, 0x22, 0xCC, 0x88, 0xBB
+ .byte 0x33, 0x00, 0x66, 0xDD, 0x99, 0xEE, 0xFF, 0x88
+ .byte 0x55, 0xCC, 0xBB, 0x77, 0xAA, 0x11, 0x44, 0x22
+
+
+/*
+ * void ip(uint32_t *i, uint8_t *o){
+ */
+/*
+ * param i is given in r24:r25
+ * parma o is given in r22:r23
+ */
+.global serpent_ip
+serpent_ip:
+ push_range 2, 17
+ movw r26, r24
+ ldi r24, 16
+ clr r31
+ ldi r30, 2
+1:
+ ld r25, X+
+ st Z+, r25
+ dec r24
+ brne 1b
+ /* now the whole input is loaded in r2-r18 */
+ movw r26, r22
+ ldi r21, 4
+4:
+ ldi r20, 8
+2:
+ lsr r2
+ ror r19
+ lsr r6
+ ror 19
+ lsr r10
+ ror r19
+ lsr r14
+ ror 19
+ sbrc r20, 0
+ st X+, r19
+ dec r20
+ brne 2b
+
+ ldi r20, 15
+ ldi r30, 2
+3:
+ ldd r19, Z+1
+ st Z+, r19
+ dec r20
+ brne 3b
+
+ dec r21
+ brne 4b
+ pop_range 2, 17
+ ret
+
+/*
+ * void serpent_fp(uint32_t *i, uint8_t *o){
+ */
+/*
+ * param i is given in r24:r25
+ * parma o is given in r22:r23
+ */
+.global serpent_fp
+serpent_fp:
+ movw r26, r24
+ movw r30, r22
+ ldi r18, 4
+1:
+ ldi r19, 8
+2:
+ sbrs r19, 0
+ ld r24, X+
+3:
+ lsr r24
+ ror r20
+ lsr r24
+ ror r21
+ lsr r24
+ ror r22
+ lsr r24
+ ror r23
+ dec r19
+ brne 2b
+
+ st Z+, r20
+ std Z+3, r21
+ std Z+7, r22
+ std Z+11, r23
+
+ dec r18
+ brne 1b
+ ret
+
+
+/*
+ * void inv_sbox128(void * w, uint8_t box)
+ */
+.global inv_sbox128
+inv_sbox128:
+ andi r22, 0x07
+ ori r22, 0x08
+ rjmp sbox128x_fast
+
+/*
+ * void sbox128(void * w, uint8_t box);
+ */
+/*
+ * param w is passed in r24:r25
+ * param box is passed in r22
+ */
+.global sbox128
+sbox128:
+ andi r22, 0x07
+
+sbox128x_fast:
+ stack_alloc 16
+ adiw r30, 1
+ push_ r24, r25, r22, r30, r31
+ movw r22, r30 /* Z points to the stack buffer */
+ rcall serpent_ip
+ pop_ r27, r26, r22
+ ldi r25, hi8(serpent_sbox_fast)
+ ldi r24, lo8(serpent_sbox_fast)
+ swap r22 /* r22 *= 16 */
+ add r24, r22
+ adc r25, r1
+ /* now we have X pointing to the buffer and (r24:r25) pointing to the SBox */
+ ldi r22, 16
+1:
+ movw r30, r24
+ ld r18, X
+ mov r20, r18
+ andi r18, 0x0f
+ add r30, r18
+ adc r31, r1
+ lpm r19, Z
+2:
+ swap r20
+ andi r20, 0x0f
+ movw r30, r24
+ add r30, r20
+ adc r31, r1
+ lpm r21, Z
+3:
+ andi r19, 0x0F
+ andi r21, 0xF0
+ or r19, r21
+ st X+, r19
+ dec r22
+ brne 1b
+
+ pop_ r23, r22
+ movw r24, r26
+ sbiw r24, 16
+
+ rcall serpent_fp
+
+ stack_free 16
+ ret
+
+
+
+
+
+
+
+
+