/* serpent-sboxes-fast.S */
/*
This file is part of the AVR-Crypto-Lib.
Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
*/
/*
* File: serpent-sboxes-fast.S
* Author: Daniel Otte
* Date: 2008-08-07
* License: GPLv3 or later
* Description: Implementation of the serpent sbox function.
*
*/
#include
#include "avr-asm-macros.S"
serpent_sbox_fast:
.byte 0x33, 0x88, 0xFF, 0x11, 0xAA, 0x66, 0x55, 0xBB
.byte 0xEE, 0xDD, 0x44, 0x22, 0x77, 0x00, 0x99, 0xCC
.byte 0xFF, 0xCC, 0x22, 0x77, 0x99, 0x00, 0x55, 0xAA
.byte 0x11, 0xBB, 0xEE, 0x88, 0x66, 0xDD, 0x33, 0x44
.byte 0x88, 0x66, 0x77, 0x99, 0x33, 0xCC, 0xAA, 0xFF
.byte 0xDD, 0x11, 0xEE, 0x44, 0x00, 0xBB, 0x55, 0x22
.byte 0x00, 0xFF, 0xBB, 0x88, 0xCC, 0x99, 0x66, 0x33
.byte 0xDD, 0x11, 0x22, 0x44, 0xAA, 0x77, 0x55, 0xEE
.byte 0x11, 0xFF, 0x88, 0x33, 0xCC, 0x00, 0xBB, 0x66
.byte 0x22, 0x55, 0x44, 0xAA, 0x99, 0xEE, 0x77, 0xDD
.byte 0xFF, 0x55, 0x22, 0xBB, 0x44, 0xAA, 0x99, 0xCC
.byte 0x00, 0x33, 0xEE, 0x88, 0xDD, 0x66, 0x77, 0x11
.byte 0x77, 0x22, 0xCC, 0x55, 0x88, 0x44, 0x66, 0xBB
.byte 0xEE, 0x99, 0x11, 0xFF, 0xDD, 0x33, 0xAA, 0x00
.byte 0x11, 0xDD, 0xFF, 0x00, 0xEE, 0x88, 0x22, 0xBB
.byte 0x77, 0x44, 0xCC, 0xAA, 0x99, 0x33, 0x55, 0x66
serpent_sbox_inv_fast:
.byte 0xDD, 0x33, 0xBB, 0x00, 0xAA, 0x66, 0x55, 0xCC
.byte 0x11, 0xEE, 0x44, 0x77, 0xFF, 0x99, 0x88, 0x22
.byte 0x55, 0x88, 0x22, 0xEE, 0xFF, 0x66, 0xCC, 0x33
.byte 0xBB, 0x44, 0x77, 0x99, 0x11, 0xDD, 0xAA, 0x00
.byte 0xCC, 0x99, 0xFF, 0x44, 0xBB, 0xEE, 0x11, 0x22
.byte 0x00, 0x33, 0x66, 0xDD, 0x55, 0x88, 0xAA, 0x77
.byte 0x00, 0x99, 0xAA, 0x77, 0xBB, 0xEE, 0x66, 0xDD
.byte 0x33, 0x55, 0xCC, 0x22, 0x44, 0x88, 0xFF, 0x11
.byte 0x55, 0x00, 0x88, 0x33, 0xAA, 0x99, 0x77, 0xEE
.byte 0x22, 0xCC, 0xBB, 0x66, 0x44, 0xFF, 0xDD, 0x11
.byte 0x88, 0xFF, 0x22, 0x99, 0x44, 0x11, 0xDD, 0xEE
.byte 0xBB, 0x66, 0x55, 0x33, 0x77, 0xCC, 0xAA, 0x00
.byte 0xFF, 0xAA, 0x11, 0xDD, 0x55, 0x33, 0x66, 0x00
.byte 0x44, 0x99, 0xEE, 0x77, 0x22, 0xCC, 0x88, 0xBB
.byte 0x33, 0x00, 0x66, 0xDD, 0x99, 0xEE, 0xFF, 0x88
.byte 0x55, 0xCC, 0xBB, 0x77, 0xAA, 0x11, 0x44, 0x22
/*
* void ip(uint32_t *i, uint8_t *o){
*/
/*
* param i is given in r24:r25
* parma o is given in r22:r23
*/
.global serpent_ip
serpent_ip:
push_range 2, 17
movw r26, r24
ldi r24, 16
clr r31
ldi r30, 2
1:
ld r25, X+
st Z+, r25
dec r24
brne 1b
/* now the whole input is loaded in r2-r18 */
movw r26, r22
ldi r21, 4
4:
ldi r20, 8
2:
lsr r2
ror r19
lsr r6
ror 19
lsr r10
ror r19
lsr r14
ror 19
sbrc r20, 0
st X+, r19
dec r20
brne 2b
ldi r20, 15
ldi r30, 2
3:
ldd r19, Z+1
st Z+, r19
dec r20
brne 3b
dec r21
brne 4b
pop_range 2, 17
ret
/*
* void serpent_fp(uint32_t *i, uint8_t *o){
*/
/*
* param i is given in r24:r25
* parma o is given in r22:r23
*/
.global serpent_fp
serpent_fp:
movw r26, r24
movw r30, r22
ldi r18, 4
1:
ldi r19, 8
2:
sbrs r19, 0
ld r24, X+
3:
lsr r24
ror r20
lsr r24
ror r21
lsr r24
ror r22
lsr r24
ror r23
dec r19
brne 2b
st Z+, r20
std Z+3, r21
std Z+7, r22
std Z+11, r23
dec r18
brne 1b
ret
/*
* void inv_sbox128(void * w, uint8_t box)
*/
.global inv_sbox128
inv_sbox128:
andi r22, 0x07
ori r22, 0x08
rjmp sbox128x_fast
/*
* void sbox128(void * w, uint8_t box);
*/
/*
* param w is passed in r24:r25
* param box is passed in r22
*/
.global sbox128
sbox128:
andi r22, 0x07
sbox128x_fast:
stack_alloc 16
adiw r30, 1
push_ r24, r25, r22, r30, r31
movw r22, r30 /* Z points to the stack buffer */
rcall serpent_ip
pop_ r27, r26, r22
ldi r25, hi8(serpent_sbox_fast)
ldi r24, lo8(serpent_sbox_fast)
swap r22 /* r22 *= 16 */
add r24, r22
adc r25, r1
/* now we have X pointing to the buffer and (r24:r25) pointing to the SBox */
ldi r22, 16
1:
movw r30, r24
ld r18, X
mov r20, r18
andi r18, 0x0f
add r30, r18
adc r31, r1
lpm r19, Z
2:
swap r20
andi r20, 0x0f
movw r30, r24
add r30, r20
adc r31, r1
lpm r21, Z
3:
andi r19, 0x0F
andi r21, 0xF0
or r19, r21
st X+, r19
dec r22
brne 1b
pop_ r23, r22
movw r24, r26
sbiw r24, 16
rcall serpent_fp
stack_free 16
ret