X-Git-Url: https://git.cryptolib.org/?p=avr-crypto-lib.git;a=blobdiff_plain;f=serpent%2Fserpent-sboxes-fast.S;fp=serpent%2Fserpent-sboxes-fast.S;h=9242272090267a6958da7d1358fe708339d2dc5c;hp=0000000000000000000000000000000000000000;hb=d32eba56ce10ea6b9eff123b50d9842673b38f2b;hpb=8f855d283a31a468ea014774c4723a8b77b81644 diff --git a/serpent/serpent-sboxes-fast.S b/serpent/serpent-sboxes-fast.S new file mode 100644 index 0000000..9242272 --- /dev/null +++ b/serpent/serpent-sboxes-fast.S @@ -0,0 +1,233 @@ +/* serpent-sboxes-fast.S */ +/* + This file is part of the AVR-Crypto-Lib. + Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +/* + * File: serpent-sboxes-fast.S + * Author: Daniel Otte + * Date: 2008-08-07 + * License: GPLv3 or later + * Description: Implementation of the serpent sbox function. + * + */ + +#include +#include "avr-asm-macros.S" + + +serpent_sbox_fast: + .byte 0x33, 0x88, 0xFF, 0x11, 0xAA, 0x66, 0x55, 0xBB + .byte 0xEE, 0xDD, 0x44, 0x22, 0x77, 0x00, 0x99, 0xCC + .byte 0xFF, 0xCC, 0x22, 0x77, 0x99, 0x00, 0x55, 0xAA + .byte 0x11, 0xBB, 0xEE, 0x88, 0x66, 0xDD, 0x33, 0x44 + .byte 0x88, 0x66, 0x77, 0x99, 0x33, 0xCC, 0xAA, 0xFF + .byte 0xDD, 0x11, 0xEE, 0x44, 0x00, 0xBB, 0x55, 0x22 + .byte 0x00, 0xFF, 0xBB, 0x88, 0xCC, 0x99, 0x66, 0x33 + .byte 0xDD, 0x11, 0x22, 0x44, 0xAA, 0x77, 0x55, 0xEE + .byte 0x11, 0xFF, 0x88, 0x33, 0xCC, 0x00, 0xBB, 0x66 + .byte 0x22, 0x55, 0x44, 0xAA, 0x99, 0xEE, 0x77, 0xDD + .byte 0xFF, 0x55, 0x22, 0xBB, 0x44, 0xAA, 0x99, 0xCC + .byte 0x00, 0x33, 0xEE, 0x88, 0xDD, 0x66, 0x77, 0x11 + .byte 0x77, 0x22, 0xCC, 0x55, 0x88, 0x44, 0x66, 0xBB + .byte 0xEE, 0x99, 0x11, 0xFF, 0xDD, 0x33, 0xAA, 0x00 + .byte 0x11, 0xDD, 0xFF, 0x00, 0xEE, 0x88, 0x22, 0xBB + .byte 0x77, 0x44, 0xCC, 0xAA, 0x99, 0x33, 0x55, 0x66 + +serpent_sbox_inv_fast: + .byte 0xDD, 0x33, 0xBB, 0x00, 0xAA, 0x66, 0x55, 0xCC + .byte 0x11, 0xEE, 0x44, 0x77, 0xFF, 0x99, 0x88, 0x22 + .byte 0x55, 0x88, 0x22, 0xEE, 0xFF, 0x66, 0xCC, 0x33 + .byte 0xBB, 0x44, 0x77, 0x99, 0x11, 0xDD, 0xAA, 0x00 + .byte 0xCC, 0x99, 0xFF, 0x44, 0xBB, 0xEE, 0x11, 0x22 + .byte 0x00, 0x33, 0x66, 0xDD, 0x55, 0x88, 0xAA, 0x77 + .byte 0x00, 0x99, 0xAA, 0x77, 0xBB, 0xEE, 0x66, 0xDD + .byte 0x33, 0x55, 0xCC, 0x22, 0x44, 0x88, 0xFF, 0x11 + .byte 0x55, 0x00, 0x88, 0x33, 0xAA, 0x99, 0x77, 0xEE + .byte 0x22, 0xCC, 0xBB, 0x66, 0x44, 0xFF, 0xDD, 0x11 + .byte 0x88, 0xFF, 0x22, 0x99, 0x44, 0x11, 0xDD, 0xEE + .byte 0xBB, 0x66, 0x55, 0x33, 0x77, 0xCC, 0xAA, 0x00 + .byte 0xFF, 0xAA, 0x11, 0xDD, 0x55, 0x33, 0x66, 0x00 + .byte 0x44, 0x99, 0xEE, 0x77, 0x22, 0xCC, 0x88, 0xBB + .byte 0x33, 0x00, 0x66, 0xDD, 0x99, 0xEE, 0xFF, 0x88 + .byte 0x55, 0xCC, 0xBB, 0x77, 0xAA, 0x11, 0x44, 0x22 + + +/* + * void ip(uint32_t *i, uint8_t *o){ + */ +/* + * param i is given in r24:r25 + * parma o is given in r22:r23 + */ +.global serpent_ip +serpent_ip: + push_range 2, 17 + movw r26, r24 + ldi r24, 16 + clr r31 + ldi r30, 2 +1: + ld r25, X+ + st Z+, r25 + dec r24 + brne 1b + /* now the whole input is loaded in r2-r18 */ + movw r26, r22 + ldi r21, 4 +4: + ldi r20, 8 +2: + lsr r2 + ror r19 + lsr r6 + ror 19 + lsr r10 + ror r19 + lsr r14 + ror 19 + sbrc r20, 0 + st X+, r19 + dec r20 + brne 2b + + ldi r20, 15 + ldi r30, 2 +3: + ldd r19, Z+1 + st Z+, r19 + dec r20 + brne 3b + + dec r21 + brne 4b + pop_range 2, 17 + ret + +/* + * void serpent_fp(uint32_t *i, uint8_t *o){ + */ +/* + * param i is given in r24:r25 + * parma o is given in r22:r23 + */ +.global serpent_fp +serpent_fp: + movw r26, r24 + movw r30, r22 + ldi r18, 4 +1: + ldi r19, 8 +2: + sbrs r19, 0 + ld r24, X+ +3: + lsr r24 + ror r20 + lsr r24 + ror r21 + lsr r24 + ror r22 + lsr r24 + ror r23 + dec r19 + brne 2b + + st Z+, r20 + std Z+3, r21 + std Z+7, r22 + std Z+11, r23 + + dec r18 + brne 1b + ret + + +/* + * void inv_sbox128(void * w, uint8_t box) + */ +.global inv_sbox128 +inv_sbox128: + andi r22, 0x07 + ori r22, 0x08 + rjmp sbox128x_fast + +/* + * void sbox128(void * w, uint8_t box); + */ +/* + * param w is passed in r24:r25 + * param box is passed in r22 + */ +.global sbox128 +sbox128: + andi r22, 0x07 + +sbox128x_fast: + stack_alloc 16 + adiw r30, 1 + push_ r24, r25, r22, r30, r31 + movw r22, r30 /* Z points to the stack buffer */ + rcall serpent_ip + pop_ r27, r26, r22 + ldi r25, hi8(serpent_sbox_fast) + ldi r24, lo8(serpent_sbox_fast) + swap r22 /* r22 *= 16 */ + add r24, r22 + adc r25, r1 + /* now we have X pointing to the buffer and (r24:r25) pointing to the SBox */ + ldi r22, 16 +1: + movw r30, r24 + ld r18, X + mov r20, r18 + andi r18, 0x0f + add r30, r18 + adc r31, r1 + lpm r19, Z +2: + swap r20 + andi r20, 0x0f + movw r30, r24 + add r30, r20 + adc r31, r1 + lpm r21, Z +3: + andi r19, 0x0F + andi r21, 0xF0 + or r19, r21 + st X+, r19 + dec r22 + brne 1b + + pop_ r23, r22 + movw r24, r26 + sbiw r24, 16 + + rcall serpent_fp + + stack_free 16 + ret + + + + + + + + +