From 326fddd4d1ff3b8a78e5ab5b41473879ed43e453 Mon Sep 17 00:00:00 2001 From: bg Date: Wed, 1 Sep 2010 15:31:19 +0000 Subject: [PATCH] optimizations on cubehash --- cubehash/cubehash.c | 67 ++++++++++++++++--------------------- cubehash/cubehash_rotates.S | 1 + cubehash/xchg.S | 44 ++++++++++++++++++++++++ cubehash/xchg.h | 27 +++++++++++++++ mkfiles/cubehash_c.mk | 2 +- 5 files changed, 101 insertions(+), 40 deletions(-) create mode 100644 cubehash/xchg.S create mode 100644 cubehash/xchg.h diff --git a/cubehash/cubehash.c b/cubehash/cubehash.c index 8b4a931..0cc62b0 100644 --- a/cubehash/cubehash.c +++ b/cubehash/cubehash.c @@ -29,16 +29,17 @@ #include "memxor.h" #include "cubehash.h" #include "cubehash_rotates.h" +#include "xchg.h" #include #include /* -• Add x_0jklm into x_1jklm modulo 232 , for each (j, k, l, m). +• Add x_0jklm into x_1jklm modulo 2**32 , for each (j, k, l, m). • Rotate x_0jklm upwards by 7 bits, for each (j, k, l, m). • Swap x_00klm with x_01klm , for each (k, l, m). • Xor x_1jklm into x_0jklm , for each (j, k, l, m). • Swap x_1jk0m with x_1jk1m , for each (j, k, m). -• Add x_0jklm into x_1jklm modulo 232 , for each (j, k, l, m). +• Add x_0jklm into x_1jklm modulo 2**32 , for each (j, k, l, m). • Rotate x_0jklm upwards by 11 bits, for each (j, k, l, m). • Swap x_0j0lm with x_0j1lm , for each (j, l, m). • Xor x_1jklm into x_0jklm , for each (j, k, l, m). @@ -47,46 +48,34 @@ static void cubehash_round(cubehash_ctx_t* ctx){ uint8_t i; - uint32_t t; + uint32_t t, t2; for(i=0; i<16; ++i){ - ctx->a[i+16] += ctx->a[i]; - ctx->a[i] = rotate7left(ctx->a[i]); + ctx->a[i+16] += t = ctx->a[i]; + ctx->a[i] = rotate7left(t); } - for(i=0; i<8; ++i){ - t = ctx->a[i]; - ctx->a[i] = ctx->a[i+8]; - ctx->a[i+8] = t; + xchg32_array(&(ctx->a[0]), &(ctx->a[8]), 8); + for(i=0; i<16; i+=4){ + t = ctx->a[i+16]; + t2 = ctx->a[i] ^= t; + ctx->a[i+16] = ctx->a[i+18] + t2; + ctx->a[i] = rotate11left(t2); + t2 = ctx->a[i+2] ^= ctx->a[i+18]; + ctx->a[i+18] = t + t2; + ctx->a[i+2] = rotate11left(t2); + t = ctx->a[i+17]; + t2 = ctx->a[i+1] ^= t; + ctx->a[i+17] = ctx->a[i+19] + t2; + ctx->a[i+1] = rotate11left(t2); + t2 = ctx->a[i+3] ^= ctx->a[i+19]; + ctx->a[i+19] = t + t2; + ctx->a[i+3] = rotate11left(t2); } - for(i=16; i<4*4+16; i+=4){ - t = ctx->a[i]; - ctx->a[i-16] ^= t; - ctx->a[i] = ctx->a[i+2] + ctx->a[i-16]; - ctx->a[i-16] = rotate11left(ctx->a[i-16]); - ctx->a[i-14] ^= ctx->a[i+2]; - ctx->a[i+2] = t + ctx->a[i-14]; - ctx->a[i-14] = rotate11left(ctx->a[i-14]); - t = ctx->a[i+1]; - ctx->a[i-15] ^= t; - ctx->a[i+1] = ctx->a[i+3] + ctx->a[i-15]; - ctx->a[i-15] = rotate11left(ctx->a[i-15]); - ctx->a[i-13] ^= ctx->a[i+3]; - ctx->a[i+3] = t + ctx->a[i-13]; - ctx->a[i-13] = rotate11left(ctx->a[i-13]); - } - for(i=0; i<4; ++i){ - t = ctx->a[i]; - ctx->a[i] = ctx->a[i+4]; - ctx->a[i+4] = t; - } - for(i=8; i<4+8; ++i){ - t = ctx->a[i]; - ctx->a[i] = ctx->a[i+4]; - ctx->a[i+4] = t; - } - for(i=16; i<16+16; i+=2){ - ctx->a[i-16] ^= t = ctx->a[i]; - ctx->a[i-15] ^= ctx->a[i] = ctx->a[i+1]; - ctx->a[i+1] = t; + xchg32_array(&(ctx->a[0]), &(ctx->a[4]), 4); + xchg32_array(&(ctx->a[8]), &(ctx->a[12]), 4); + for(i=0; i<16; i+=2){ + ctx->a[i] ^= t = ctx->a[i+16]; + ctx->a[i+1] ^= ctx->a[i+16] = ctx->a[i+17]; + ctx->a[i+17] = t; } } diff --git a/cubehash/cubehash_rotates.S b/cubehash/cubehash_rotates.S index 2e4ca16..01c228d 100644 --- a/cubehash/cubehash_rotates.S +++ b/cubehash/cubehash_rotates.S @@ -55,3 +55,4 @@ rotate11left: rol r22 ret + diff --git a/cubehash/xchg.S b/cubehash/xchg.S new file mode 100644 index 0000000..7e13447 --- /dev/null +++ b/cubehash/xchg.S @@ -0,0 +1,44 @@ +/* xchg.S */ +/* + This file is part of the AVR-Crypto-Lib. + Copyright (C) 2006-2010 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +.global xchg32_array +xchg32_array: + movw r26, r24 + movw r30, r22 +1: + ld r24, X + ld r25, Z + st X+, r25 + st Z+, r24 + ld r24, X + ld r25, Z + st X+, r25 + st Z+, r24 + ld r24, X + ld r25, Z + st X+, r25 + st Z+, r24 + ld r24, X + ld r25, Z + st X+, r25 + st Z+, r24 + dec r20 + brne 1b + ret + diff --git a/cubehash/xchg.h b/cubehash/xchg.h new file mode 100644 index 0000000..c8c6565 --- /dev/null +++ b/cubehash/xchg.h @@ -0,0 +1,27 @@ +/* xchg.h */ +/* + This file is part of the AVR-Crypto-Lib. + Copyright (C) 2010 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#ifndef XCHG_H_ +#define XCHG_H_ + +#include + +void xchg32_array(void* a, void* b, uint8_t n); + +#endif /* XCHG_H_ */ diff --git a/mkfiles/cubehash_c.mk b/mkfiles/cubehash_c.mk index 73c227e..051ae16 100644 --- a/mkfiles/cubehash_c.mk +++ b/mkfiles/cubehash_c.mk @@ -5,7 +5,7 @@ ALGO_NAME := CUBEHASH_C HASHES += $(ALGO_NAME) $(ALGO_NAME)_DIR := cubehash/ -$(ALGO_NAME)_OBJ := cubehash.o cubehash_rotates.o memxor.o +$(ALGO_NAME)_OBJ := cubehash.o cubehash_rotates.o memxor.o xchg.o $(ALGO_NAME)_TEST_BIN := main-cubehash-test.o hfal_cubehash.o $(CLI_STD) $(HFAL_STD) $(ALGO_NAME)_NESSIE_TEST := test nessie $(ALGO_NAME)_PERFORMANCE_TEST := performance -- 2.39.5