From 0f66a12e93ed43904c30810ac33c79c87befafe7 Mon Sep 17 00:00:00 2001 From: bg Date: Sun, 10 Aug 2008 23:19:11 +0000 Subject: [PATCH] even more serpent in asm --- mkfiles/serpent_asm_small.mk | 2 +- serpent-asm.S | 591 +++++++++++++++++++++++++++++++++++ serpent-sboxes-fast.S | 2 +- serpent-sboxes-small.S | 2 +- serpent.c | 24 +- shabea.h | 16 +- 6 files changed, 623 insertions(+), 14 deletions(-) create mode 100644 serpent-asm.S diff --git a/mkfiles/serpent_asm_small.mk b/mkfiles/serpent_asm_small.mk index c37017b..835dfee 100644 --- a/mkfiles/serpent_asm_small.mk +++ b/mkfiles/serpent_asm_small.mk @@ -5,7 +5,7 @@ ALGO_NAME := SERPENT_ASM_SMALL BLOCK_CIPHERS += $(ALGO_NAME) -$(ALGO_NAME)_OBJ := serpent.o serpent-sboxes-small.o memxor.o +$(ALGO_NAME)_OBJ := serpent.o serpent-asm.o serpent-sboxes-small.o memxor.o $(ALGO_NAME)_TEST_BIN := main-serpent-test.o debug.o uart.o serial-tools.o \ nessie_bc_test.o nessie_common.o cli.o performance_test.o $(ALGO_NAME)_NESSIE_TEST := "nessie" diff --git a/serpent-asm.S b/serpent-asm.S new file mode 100644 index 0000000..a16f6af --- /dev/null +++ b/serpent-asm.S @@ -0,0 +1,591 @@ +/* serpent_asm.S */ +/* + This file is part of the Crypto-avr-lib/microcrypt-lib. + Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +/* + * File: serpent_sboxes.S + * Author: Daniel Otte + * Date: 2008-08-07 + * License: GPLv3 or later + * Description: Implementation of the serpent sbox function. + * + */ + +#include + +/******************************************************************************* +* MACRO SECTION * +*******************************************************************************/ + +.macro push_ p1:req, p2:vararg + push \p1 +.ifnb \p2 + push_ \p2 +.endif +.endm + +.macro pop_ p1:req, p2:vararg + pop \p1 +.ifnb \p2 + pop_ \p2 +.endif +.endm + +.macro push_range from:req, to:req + push \from +.if \to-\from + push_range "(\from+1)",\to +.endif +.endm + +.macro pop_range from:req, to:req + pop \to +.if \to-\from + pop_range \from,"(\to-1)" +.endif +.endm + +.macro stack_alloc size:req, reg1=r30, reg2=r31 + in r0, _SFR_IO_ADDR(SREG) + cli + in \reg1, _SFR_IO_ADDR(SPL) + in \reg2, _SFR_IO_ADDR(SPH) + sbiw \reg1, \size + out _SFR_IO_ADDR(SPH), \reg2 + out _SFR_IO_ADDR(SPL), \reg1 + out _SFR_IO_ADDR(SREG), r0 +.endm + +.macro stack_free size:req, reg1=r30, reg2=r31 + in r0, _SFR_IO_ADDR(SREG) + cli + in \reg1, _SFR_IO_ADDR(SPL) + in \reg2, _SFR_IO_ADDR(SPH) + adiw \reg1, \size + out _SFR_IO_ADDR(SPH), \reg2 + out _SFR_IO_ADDR(SPL), \reg1 + out _SFR_IO_ADDR(SREG), r0 +.endm + +/******************************************************************************* +* END of MACRO SECTION * +*******************************************************************************/ + +/* +static void serpent_lt(uint8_t *b){ + X0 = rotl32(X0, 13); + X2 = rotl32(X2, 3); + X1 ^= X0 ^ X2; + X3 ^= X2 ^ (X0 << 3); + X1 = rotl32(X1, 1); + X3 = rotl32(X3, 7); + X0 ^= X1 ^ X3; + X2 ^= X3 ^ (X1 << 7); + X0 = rotl32(X0, 5); + X2 = rotr32(X2, 10); +} +*/ + +#if 0 +A0 = 4 +A1 = 5 +A2 = 6 +A3 = 7 +B0 = 8 +B1 = 9 +B2 = 10 +B3 = 11 +C0 = 12 +C1 = 13 +C2 = 14 +C3 = 15 +D0 = 16 +D1 = 17 +D2 = 18 +D3 = 19 +T0 = 20 +T1 = 21 +T2 = 22 +T3 = 23 + +.global serpent_lt +serpent_lt: + push_range 4, 17 + movw r26, r24 + ld A2, X+ + ld A3, X+ + ld A0, X+ + ld A1, X+ + ldi r20, 3 + mov r0, A0 +1: + lsr r0 + ror A3 + ror A2 + ror A1 + ror A0 + dec r20 + brne 1b + ld B0, X+ + ld B1, X+ + ld B2, X+ + ld B3, X+ + + ld C2, X+ + ld C3, X+ + ld C0, X+ + ld C1, X+ + ldi r20, 3 + mov r0, C0 +1: + lsr r0 + ror C3 + ror C2 + ror C1 + ror C0 + dec r20 + brne 1b + + ld D0, X+ + ld D1, X+ + ld D2, X+ + ld D3, X+ + /* X1 ^= X0 ^ X2; */ + eor B0, A0 + eor B0, C0 + eor B1, A1 + eor B1, C1 + eor B2, A2 + eor B2, C2 + eor B3, A3 + eor B3, C3 + /* X3 ^= X2 ^ (X0 << 3); */ + mov T0, A0 + mov T1, A1 + mov T2, A2 + mov T3, A3 + ldi r24, 3 +1: + lsl T0 + rol T1 + rol T2 + rol T3 + dec r24 + brne 1b + eor C0, B0 + eor C0, T0 + eor C1, B1 + eor C1, T1 + eor C2, B2 + eor C2, T2 + eor C3, B3 + eor C3, T3 + /* X1 = rotl32(X1, 1); */ + mov r0, B3 + lsl r0 + rol B0 + rol B1 + rol B2 + rol B3 + /* X3 = rotl32(X3, 7); */ + mov r0, D3 + mov D3, D2 + mov D2, D1 + mov D1, D0 + mov D0, r0 + lsr r0 + ror D3 + ror D2 + ror D1 + ror D0 + /* X0 ^= X1 ^ X3; */ + eor A0, B0 + eor A0, D0 + eor A1, B1 + eor A1, D1 + eor A2, B2 + eor A2, D2 + eor A3, B3 + eor A3, D3 + /* X2 ^= X3 ^ (X1 << 7); */ + mov T1, B0 + mov T2, B1 + mov T3, B2 + clr T0 + mov r0, B3 + lsr r0 + ror T2 + ror T1 + ror T0 + eor C0, D0 + eor C0, T0 + eor C1, D1 + eor C1, T1 + eor C2, D2 + eor C2, T2 + eor C3, D3 + eor C3, T3 + /* X0 = rotl32(X0, 5); */ + ldi r24, 5 + mov r0, A3 +1: + lsl r0 + rol A0 + rol A1 + rol A2 + rol A3 + dec r24 + brne 1b + /* X2 = rotr32(X2, 10); */ + mov r0, C0 + mov C0, C1 + mov C1, C2 + mov C2, C3 + mov C3, r0 + ldi r24, 2 +1: + lsr r0 + ror C2 + ror C1 + ror C0 + ror C3 + dec r24 + brne 1b + + clr r31 + ldi r30, D3+1 + ldi r24, 16 +1: + ld r0, -Z + st -X, r0 + dec r24 + brne 1b + + pop_range 4, 17 + ret + +#endif +T0 = 22 +T1 = 23 +T2 = 24 +T3 = 25 +TT = 21 +/* rotate the data word (4 byte) pointed to by X by r20 bits to the right */ +memrotr32: + ld T0, X+ + ld T1, X+ + ld T2, X+ + ld T3, X+ + mov TT, T0 +1: + lsr TT + ror T3 + ror T2 + ror T1 + ror T0 + dec r20 + brne 1b + st -X, T3 + st -X, T2 + st -X, T1 + st -X, T0 + ret + +/* rotate the data word (4 byte) pointed to by X by r20 bits to the left */ +memrotl32: + ld T0, X+ + ld T1, X+ + ld T2, X+ + ld T3, X+ + mov TT, T3 +1: + lsl TT + rol T0 + rol T1 + rol T2 + rol T3 + dec r20 + brne 1b + st -X, T3 + st -X, T2 + st -X, T1 + st -X, T0 + ret + +/* xor the dataword (4 byte) pointed by Z into X */ +memeor32: + ldi T2, 4 +1: + ld T0, X + ld T1, Z+ + eor T0, T1 + st X+, T0 + dec T2 + brne 1b + ret + +.global serpent_lt +serpent_lt: + /* X0 := X0 <<< 13 */ + movw r26, r24 + ldi r20, 7 + rcall memrotl32 + ldi r20, 6 + rcall memrotl32 + /* X2 := X2 <<< 3 */ + adiw r26, 8 + ldi r20, 3 + rcall memrotl32 + /* X1 ^= X2 */ + movw r30, r26 + sbiw r26, 4 + rcall memeor32 + /* X1 ^= X0 */ + sbiw r26, 4 + sbiw r30, 12 + rcall memeor32 + /* X3 ^= X2 */ + movw r30, r26 + adiw r26, 4 + rcall memeor32 + /* T := X0 */ + sbiw r26, 16 + ld r18, X+ + ld r19, X+ + ld r20, X+ + ld r21, X+ + /* T := T<<3 */ + ldi r22, 3 +1: + lsl r18 + rol r19 + rol r20 + rol r21 + dec r22 + brne 1b + clr r31 + /* X3 ^= T */ + adiw r26, 8 + ldi r30, 18 + rcall memeor32 + /* X1 := X1<<<1 */ + sbiw r26, 12 + ldi r20, 1 + rcall memrotl32 + /* X3 := X3<<<7 */ + adiw r26, 8 + ldi r20, 7 + rcall memrotl32 + /* X0 ^= X3 */ + movw r30, r26 + sbiw r26, 12 + rcall memeor32 + /* X0 ^= X1 */ + movw r30, r26 + sbiw r26, 4 + rcall memeor32 + /* X2 ^= X3 */ + adiw r26, 4 + adiw r30, 4 + rcall memeor32 + /* T := X1<<<8 */ + sbiw r26, 8 + ld r19, X+ + ld r20, X+ + ld r21, X+ + ld r18, X+ + /* T := T>>>1; T&=0xfffffff8 */ + lsr r18 + ror r21 + ror r20 + ror r19 + clr r18 + ror r18 + clr r31 + ldi r30, 18 + /* X2 ^= T */ + rcall memeor32 + /* X0 := X0 <<< 5 */ + sbiw r26, 12 + ldi r20, 5 + rcall memrotl32 + /* X3 := X3 >>> 10 */ + adiw r26, 8 + ldi r20, 7 + rcall memrotr32 + ldi r20, 3 + rcall memrotr32 + ret + +.global serpent_inv_lt +serpent_inv_lt: + /* X0 := X0 >>> 5 */ + movw r26, r24 + ldi r20, 5 + rcall memrotr32 + /* X2 := X2 <<< 10 */ + adiw r26, 8 + ldi r20, 7 + rcall memrotl32 + ldi r20, 3 + rcall memrotl32 + /* X2 ^= X3 */ + movw r30, r26 + adiw r30, 4 + rcall memeor32 + sbiw r26, 4 + sbiw r30, 12 + /* T := X1<<7 */ + ld r19, Z+ + ld r20, Z+ + ld r21, Z+ + ld r18, Z+ + lsr r18 + ror r21 + ror r20 + ror r19 + clr r18 + ror r18 + clr r31 + /* X2 ^= T */ + ldi r30, 18 + rcall memeor32 + /* X0 ^= X1 */ + sbiw r26, 12 + movw r30, r26 + adiw r30, 4 + rcall memeor32 + /* X0 ^= X3 */ + sbiw r26, 4 + adiw r30, 4 + rcall memeor32 + /* X1 := X1>>>1 */ + ldi r20, 1 + rcall memrotr32 + /* X3 := X3>>>7 */ + adiw r26, 8 + ldi r20, 7 + rcall memrotr32 + /* X3 ^= X2 */ + sbiw r30, 8 + rcall memeor32 + sbiw r26, 4 + /* T:= X0<<3 */ + sbiw r30, 12 + ld r18, Z+ + ld r19, Z+ + ld r20, Z+ + ld r21, Z+ + ldi r24, 3 +1: + lsl r18 + rol r19 + rol r20 + rol r21 + dec r24 + brne 1b + /* X3 ^= T */ + clr r31 + ldi r30, 18 + rcall memeor32 + /* X1 ^= X0 */ + sbiw r26, 12 + movw r30, r26 + sbiw r30, 4 + rcall memeor32 + /* X1 ^= X2 */ + movw r26, r30 + adiw r30, 4 + rcall memeor32 + /* X2 := X2 >>> 3 */ + ldi r20, 3 + rcall memrotr32 + /* X0 := X0 >>> 13 */ + sbiw r26, 8 + ldi r20, 7 + rcall memrotr32 + ldi r20, 6 + rcall memrotr32 + ret + +/* +#define GOLDEN_RATIO 0x9e3779b9l + +static uint32_t serpent_gen_w(uint32_t * b, uint8_t i){ + uint32_t ret; + ret = b[0] ^ b[3] ^ b[5] ^ b[7] ^ GOLDEN_RATIO ^ (uint32_t)i; + ret = rotl32(ret, 11); + return ret; +} +*/ +/* + * param b is passed in r24:r25 + * param i is passed in r22 + * return value is returned in r22.r23.r24.r25 + */ +.global serpent_gen_w +serpent_gen_w: + movw r30, r24 + /* ^i^b[0]*/ + ld r21, Z+ + eor r22, r21 + ld r23, Z+ + ld r24, Z+ + ld r25, Z+ + /* ^b[3]^b[5]^[b7] */ + adiw r30, 4 + ldi r20, 3 +1: + adiw r30, 4 + ld r21, Z+ + eor r22, r21 + ld r21, Z+ + eor r23, r21 + ld r21, Z+ + eor r24, r21 + ld r21, Z+ + eor r25, r21 + dec r20 + brne 1b + /* ^0x9e3779b9l */ + ldi r21, 0xb9 + eor r22, r21 + ldi r21, 0x79 + eor r23, r21 + ldi r21, 0x37 + eor r24, r21 + ldi r21, 0x9e + eor r25, r21 + /* <<<11 */ + mov r21, r25 + mov r25, r24 + mov r24, r23 + mov r23, r22 + mov r22, r21 + mov r21, r25 + ldi r20, 3 +1: + lsl r21 + rol r22 + rol r23 + rol r24 + rol r25 + dec r20 + brne 1b + ret + diff --git a/serpent-sboxes-fast.S b/serpent-sboxes-fast.S index d3756df..0adcc13 100644 --- a/serpent-sboxes-fast.S +++ b/serpent-sboxes-fast.S @@ -83,7 +83,7 @@ .endm /******************************************************************************* -* END of MACRO SECTION * +* END of MACRO SECTION * *******************************************************************************/ serpent_sbox_fast: diff --git a/serpent-sboxes-small.S b/serpent-sboxes-small.S index cc9da3c..3259240 100644 --- a/serpent-sboxes-small.S +++ b/serpent-sboxes-small.S @@ -83,7 +83,7 @@ .endm /******************************************************************************* -* END of MACRO SECTION * +* END of MACRO SECTION * *******************************************************************************/ serpent_sbox: .byte 0x83, 0x1F, 0x6A, 0xB5, 0xDE, 0x24, 0x07, 0xC9 diff --git a/serpent.c b/serpent.c index c90b099..4960664 100644 --- a/serpent.c +++ b/serpent.c @@ -47,7 +47,9 @@ uint32_t rotr32(uint32_t a, uint8_t n){ #define X2 (((uint32_t*)b)[2]) #define X3 (((uint32_t*)b)[3]) -static void lt(uint8_t *b){ +void serpent_lt(uint8_t *b); +/* +static void serpent_lt(uint8_t *b){ X0 = rotl32(X0, 13); X2 = rotl32(X2, 3); X1 ^= X0 ^ X2; @@ -59,8 +61,12 @@ static void lt(uint8_t *b){ X0 = rotl32(X0, 5); X2 = rotr32(X2, 10); } +*/ -static void inv_lt(uint8_t *b){ +static void serpent_inv_lt(uint8_t *b); + +/* +static void serpent_inv_lt(uint8_t *b){ X2 = rotl32(X2, 10); X0 = rotr32(X0, 5); X2 ^= X3 ^ (X1 << 7); @@ -72,17 +78,19 @@ static void inv_lt(uint8_t *b){ X2 = rotr32(X2, 3); X0 = rotr32(X0, 13); } +*/ +uint32_t serpent_gen_w(uint32_t * b, uint8_t i); +/* #define GOLDEN_RATIO 0x9e3779b9l -static uint32_t gen_w(uint32_t * b, uint8_t i){ +static uint32_t serpent_gen_w(uint32_t * b, uint8_t i){ uint32_t ret; ret = b[0] ^ b[3] ^ b[5] ^ b[7] ^ GOLDEN_RATIO ^ (uint32_t)i; ret = rotl32(ret, 11); return ret; } - -/* key must be 256bit (32 byte) large! */ +*/ void serpent_init(const void* key, uint16_t keysize, serpent_ctx_t* ctx){ uint32_t buffer[8]; uint8_t i,j; @@ -97,7 +105,7 @@ void serpent_init(const void* key, uint16_t keysize, serpent_ctx_t* ctx){ } for(i=0; i<33; ++i){ for(j=0; j<4; ++j){ - ctx->k[i][j] = gen_w(buffer, i*4+j); + ctx->k[i][j] = serpent_gen_w(buffer, i*4+j); memmove(buffer, &(buffer[1]), 7*4); /* shift buffer one to the "left" */ buffer[7] = ctx->k[i][j]; } @@ -113,7 +121,7 @@ void serpent_enc(void* buffer, const serpent_ctx_t* ctx){ for(i=0; i<31; ++i){ memxor(buffer, ctx->k[i], 16); sbox128(buffer, i); - lt((uint8_t*)buffer); + serpent_lt((uint8_t*)buffer); } memxor(buffer, ctx->k[i], 16); sbox128(buffer, i); @@ -130,7 +138,7 @@ void serpent_dec(void* buffer, const serpent_ctx_t* ctx){ memxor((uint8_t*)buffer, ctx->k[i], 16); --i; for(; i>=0; --i){ - inv_lt(buffer); + serpent_inv_lt(buffer); inv_sbox128(buffer, i); memxor(buffer, ctx->k[i], 16); } diff --git a/shabea.h b/shabea.h index 0adaade..28432a2 100644 --- a/shabea.h +++ b/shabea.h @@ -21,8 +21,7 @@ * \author Daniel Otte * \date 2007-06-07 * \brief SHABEA - a SHA Based Encryption Algorithm declarations - * \par License - * GPL + * \license GPLv3 or later * * SHABEAn-r where n is the blocksize and r the number of round used * @@ -31,5 +30,16 @@ #ifndef SHABEA_H_ #define SHABEA_H_ -void shabea256(void * block, void * key, uint16_t keysize_b, uint8_t enc, uint8_t rounds); +/** \fn void shabea256(void * block, const void * key, uint16_t keysize_b, uint8_t enc, uint8_t rounds); + * \brief shabea256 encryption/decryption + * + * \param block pointer to a 256 bit (32 byte block) to en/decrypt + * \param key pointer to the key material + * \param keysize_b length of the key in bits + * \param enc controls if encryption (1) or decryption (0) is done + * \param rounds rounds to be done by the cipher (it is not recommended to use less then four rounds) + */ +void shabea256(void * block, const void * key, uint16_t keysize_b, + uint8_t enc, uint8_t rounds); + #endif /*SHABEA_H_*/ -- 2.39.5