From 25ccd391d2f02575799e94bf9aac1e108e7c0f3b Mon Sep 17 00:00:00 2001 From: bg Date: Mon, 30 Aug 2010 18:04:40 +0000 Subject: [PATCH 1/1] optimized rotates for keccak --- keccak/keccak.c | 55 +++++++++--- keccak/rotate64.S | 168 ++++++++++++++++++++++++++++++++++++ keccak/rotate64.h | 48 +++++++++++ mkfiles/keccak_c.mk | 2 +- test_src/main-keccak-test.c | 3 +- 5 files changed, 261 insertions(+), 15 deletions(-) create mode 100644 keccak/rotate64.S create mode 100644 keccak/rotate64.h diff --git a/keccak/keccak.c b/keccak/keccak.c index 7983b01..809f219 100644 --- a/keccak/keccak.c +++ b/keccak/keccak.c @@ -22,6 +22,7 @@ #include #include #include "memxor.h" +#include "rotate64.h" #include "keccak.h" #ifdef DEBUG @@ -60,6 +61,7 @@ void keccak_dump_ctx(keccak_ctx_t* ctx){ #endif +/* static uint64_t rc[] PROGMEM = { 0x0000000000000001LL, 0x0000000000008082LL, 0x800000000000808ALL, 0x8000000080008000LL, @@ -74,32 +76,45 @@ static uint64_t rc[] PROGMEM = { 0x8000000080008081LL, 0x8000000000008080LL, 0x0000000080000001LL, 0x8000000080008008LL }; +*/ + +static uint8_t rc_comp[] PROGMEM = { + 0x01, 0x92, 0xda, 0x70, + 0x9b, 0x21, 0xf1, 0x59, + 0x8a, 0x88, 0x39, 0x2a, + 0xbb, 0xcb, 0xd9, 0x53, + 0x52, 0xc0, 0x1a, 0x6a, + 0xf1, 0xd0, 0x21, 0x78, +}; uint64_t rotl64(uint64_t a, uint8_t r){ return (a<>(64-r)); } static uint8_t r[5][5] PROGMEM = { - { 0, 36, 3, 41, 18 }, - { 1, 44, 10, 45, 2 }, - { 62, 6, 43, 15, 61 }, - { 28, 55, 25, 21, 56 }, - { 27, 20, 39, 8, 14 } + { ROT_CODE( 0), ROT_CODE(36), ROT_CODE( 3), ROT_CODE(41), ROT_CODE(18) }, + { ROT_CODE( 1), ROT_CODE(44), ROT_CODE(10), ROT_CODE(45), ROT_CODE( 2) }, + { ROT_CODE(62), ROT_CODE( 6), ROT_CODE(43), ROT_CODE(15), ROT_CODE(61) }, + { ROT_CODE(28), ROT_CODE(55), ROT_CODE(25), ROT_CODE(21), ROT_CODE(56) }, + { ROT_CODE(27), ROT_CODE(20), ROT_CODE(39), ROT_CODE( 8), ROT_CODE(14) } }; +static inline void keccak_round(uint64_t a[5][5], uint8_t rci){ uint64_t b[5][5]; uint8_t i,j; + union { + uint64_t v64; + uint8_t v8[8]; + } t; /* theta */ for(i=0; i<5; ++i){ b[i][0] = a[0][i] ^ a[1][i] ^ a[2][i] ^ a[3][i] ^ a[4][i]; } for(i=0; i<5; ++i){ - b[i][1] = b[(4+i)%5][0] ^ rotl64(b[(i+1)%5][0], 1); - } - for(i=0; i<5; ++i){ + t.v64 = b[(4+i)%5][0] ^ rotate64_1bit_left(b[(i+1)%5][0]); for(j=0; j<5; ++j){ - a[j][i] ^= b[i][1]; + a[j][i] ^= t.v64; } } #if DEBUG @@ -109,7 +124,8 @@ void keccak_round(uint64_t a[5][5], uint8_t rci){ /* rho & pi */ for(i=0; i<5; ++i){ for(j=0; j<5; ++j){ - b[(2*i+3*j)%5][j] = rotl64(a[j][i], pgm_read_byte(&(r[i][j]))); +// b[(2*i+3*j)%5][j] = rotl64(a[j][i], pgm_read_byte(&(r[i][j]))); + b[(2*i+3*j)%5][j] = rotate64left_code(a[j][i], pgm_read_byte(&(r[i][j]))); } } #if DEBUG @@ -127,9 +143,22 @@ void keccak_round(uint64_t a[5][5], uint8_t rci){ keccak_dump_state(a); #endif /* iota */ - uint64_t t; - memcpy_P(&t, &(rc[rci]), 8); - a[0][0] ^= t; + +// memcpy_P(&t, &(rc_comp[rci]), 8); + t.v64 = 0; + t.v8[0] = pgm_read_byte(&(rc_comp[rci])); + if(t.v8[0]&0x40){ + t.v8[7] = 0x80; + } + if(t.v8[0]&0x20){ + t.v8[3] = 0x80; + } + if(t.v8[0]&0x10){ + t.v8[1] = 0x80; + } + t.v8[0] &= 0x8F; + + a[0][0] ^= t.v64; #if DEBUG cli_putstr_P(PSTR("\r\nAfter iota:")); keccak_dump_state(a); diff --git a/keccak/rotate64.S b/keccak/rotate64.S new file mode 100644 index 0000000..5ab3a0f --- /dev/null +++ b/keccak/rotate64.S @@ -0,0 +1,168 @@ +/* rotate64.S */ +/* + This file is part of the AVR-Crypto-Lib. + Copyright (C) 2006-2010 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +.global rotate64_1bit_left +rotate64_1bit_left: + bst r25, 7 + rol r18 + rol r19 + rol r20 + rol r21 + rol r22 + rol r23 + rol r24 + rol r25 + bld r18, 0 + ret + +.global rotate64_1bit_right +rotate64_1bit_right: + bst r18, 0 + ror r25 + ror r24 + ror r23 + ror r22 + ror r21 + ror r20 + ror r19 + ror r18 + bld r25, 7 + ret + +.global rotate64_nbit_autodir +rotate64_nbit_autodir: + lsr r16 + brcc rotate64_nbit_left +.global rotate64_nbit_right +rotate64_nbit_right: + ldi r30, pm_lo8(rotate64_1bit_right) + ldi r31, pm_hi8(rotate64_1bit_right) + rjmp icall_r16_times +.global rotate64_nbit_left +rotate64_nbit_left: + ldi r30, pm_lo8(rotate64_1bit_left) + ldi r31, pm_hi8(rotate64_1bit_left) +icall_r16_times: +1: dec r16 + brmi 2f + icall + rjmp 1b +2: + pop r16 + ret + +rotate64_1byte_left: + mov r0, r25 + mov r25, r24 + mov r24, r23 + mov r23, r22 + mov r22, r21 + mov r21, r20 + mov r20, r19 + mov r19, r18 + mov r18, r0 + ret + +rotate64_2byte_left: + movw r0, r24 + movw r24, r22 + movw r22, r20 + movw r20, r18 + movw r18, r0 + ret + +rotate64_3byte_left: + mov r0, r25 + mov r25, r22 + mov r22, r19 + mov r19, r24 + mov r24, r21 + mov r21, r18 + mov r18, r23 + mov r23, r20 + mov r20, r0 + ret + +rotate64_4byte_left: + movw r0, r24 + movw r24, r20 + movw r20, r0 + movw r0, r22 + movw r22, r18 + movw r18, r0 + ret + +rotate64_5byte_left: + mov r0, r25 + mov r25, r20 + mov r20, r23 + mov r23, r18 + mov r18, r21 + mov r21, r24 + mov r24, r19 + mov r19, r22 + mov r22, r0 + ret + +rotate64_6byte_left: + movw r0, r18 + movw r18, r20 + movw r20, r22 + movw r22, r24 + movw r24, r0 + ret + +rotate64_7byte_left: + mov r0, r18 + mov r18, r19 + mov r19, r20 + mov r20, r21 + mov r21, r22 + mov r22, r23 + mov r23, r24 + mov r24, r25 + mov r25, r0 + ret + + +byte_rot_jmp_table: + ret + rjmp rotate64_1byte_left + rjmp rotate64_2byte_left + rjmp rotate64_3byte_left + rjmp rotate64_4byte_left + rjmp rotate64_5byte_left + rjmp rotate64_6byte_left + rjmp rotate64_7byte_left + +.global rotate64left_code +rotate64left_code: + ldi r30, pm_lo8(byte_rot_jmp_table) + ldi r31, pm_hi8(byte_rot_jmp_table) + push r16 + mov r0, r16 + andi r16, 0x70 + swap r16 + add r30, r16 + adc r31, r1 + mov r16, r0 + andi r16, 0x0f + icall + clr r1 + rjmp rotate64_nbit_autodir diff --git a/keccak/rotate64.h b/keccak/rotate64.h new file mode 100644 index 0000000..4d60c4c --- /dev/null +++ b/keccak/rotate64.h @@ -0,0 +1,48 @@ +/* rotate64.h */ +/* + This file is part of the AVR-Crypto-Lib. + Copyright (C) 2010 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#ifndef ROTATE64_H_ +#define ROTATE64_H_ + +#include + +#define ROTL_0 0 +#define ROTL_1 1 +#define ROTL_2 2 +#define ROTL_3 3 +#define ROTL_4 4 +#define ROTR_0 0 +#define ROTR_1 (8+1) +#define ROTR_2 (8+2) +#define ROTR_3 (8+3) + + +#define ROT_BIT(a) (((a)<=4)?((a)<<1):(0x01|((8-(a))<<1))) + +//#define ROT_CODE(a,b) (((a)<<4) | ((b)&0x0f)) + +#define ROT_CODE(a) ((((a)/8+((((a)%8)>4)?1:0))<<4) | ROT_BIT(((a)%8))) +//#define ROT_CODE(a) ((((a)/8)<<4) | ((a)%8)) /* rotations only to the left */ + +uint64_t rotate64_1bit_left(uint64_t a); +uint64_t rotate64_1bit_right(uint64_t a); +uint64_t rotate64_nbit_autodir(uint64_t a, int8_t n); +uint64_t rotate64left_code(uint64_t a, int8_t code); + +#endif /* ROTATE64_H_ */ diff --git a/mkfiles/keccak_c.mk b/mkfiles/keccak_c.mk index 2041438..9a9c172 100644 --- a/mkfiles/keccak_c.mk +++ b/mkfiles/keccak_c.mk @@ -5,7 +5,7 @@ ALGO_NAME := KECCAK_C HASHES += $(ALGO_NAME) $(ALGO_NAME)_DIR := keccak/ -$(ALGO_NAME)_OBJ := keccak.o memxor.o +$(ALGO_NAME)_OBJ := keccak.o memxor.o rotate64.o $(ALGO_NAME)_TEST_BIN := main-keccak-test.o hfal_keccak.o $(CLI_STD) $(HFAL_STD) $(ALGO_NAME)_NESSIE_TEST := test nessie $(ALGO_NAME)_PERFORMANCE_TEST := performance diff --git a/test_src/main-keccak-test.c b/test_src/main-keccak-test.c index 42aeb4a..80ed373 100644 --- a/test_src/main-keccak-test.c +++ b/test_src/main-keccak-test.c @@ -60,7 +60,8 @@ void test_256(void){ memset(null, 0, KECCAK256_BLOCKSIZE_B); keccak_ctx_t ctx; keccak256_init(&ctx); - keccak_lastBlock(&ctx, data, 29); + keccak_nextBlock(&ctx, null); + // keccak_lastBlock(&ctx, data, 29); keccak256_ctx2hash(hash, &ctx); cli_putstr_P(PSTR("\r\n testhash: ")); cli_hexdump(hash, 32); -- 2.39.5