From 2deab9197b8d9d74c01156777b250746c36fba07 Mon Sep 17 00:00:00 2001 From: bg Date: Wed, 13 May 2009 21:48:37 +0000 Subject: [PATCH] performance upgrade --- blake_large.c | 40 ++++--- blake_small.c | 52 +++++---- hfal-performance.c | 16 ++- hfal_blake_large.c | 8 +- hfal_blake_small.c | 8 +- mkfiles/shabal.mk | 3 +- mkfiles/shabal_c.mk | 3 +- test_src/main-shabal-test.c | 210 ++++-------------------------------- test_src/performance_test.h | 6 ++ 9 files changed, 98 insertions(+), 248 deletions(-) diff --git a/blake_large.c b/blake_large.c index e4a35b6..43b0fa7 100644 --- a/blake_large.c +++ b/blake_large.c @@ -32,8 +32,8 @@ #include "blake_large.h" #include "blake_common.h" -#define BUG_3 0 /* bug compatibility with reference code */ -#define BUG_4 0 /* bug compatibility with reference code */ +#define BUG_3 1 /* bug compatibility with reference code */ +#define BUG_4 1 /* bug compatibility with reference code */ uint64_t pgm_read_qword(void* p){ union{ @@ -67,25 +67,6 @@ uint64_t blake_c[] PROGMEM = { ((0x00ff0000&(a))>>8)| \ (a)>>24 ) -void blake_large_g(uint8_t r, uint8_t i, uint64_t* v, const uint64_t* m){ - uint8_t a,b,c,d, s0, s1; - a = pgm_read_byte(blake_index_lut+4*i+0); - b = pgm_read_byte(blake_index_lut+4*i+1); - c = pgm_read_byte(blake_index_lut+4*i+2); - d = pgm_read_byte(blake_index_lut+4*i+3); - s0 = pgm_read_byte(blake_sigma+16*r+2*i+0); - s1 = pgm_read_byte(blake_sigma+16*r+2*i+1); - v[a] += v[b] + (m[s0] ^ pgm_read_qword(&(blake_c[s1]))); - v[d] = ROTR64(v[d]^v[a], 32); - v[c] += v[d]; - v[b] = ROTR64(v[b]^v[c], 25); - v[a] += v[b] + (m[s1] ^ pgm_read_qword(&(blake_c[s0]))); - v[d] = ROTR64(v[d]^v[a], 16); - v[c] += v[d]; - v[b] = ROTR64(v[b]^v[c], 11); - -} - void blake_large_expand(uint64_t* v, const blake_large_ctx_t* ctx){ uint8_t i; memcpy(v, ctx->h, 8*8); @@ -108,9 +89,24 @@ void blake_large_changeendian(void* dest, const void* src){ void blake_large_compress(uint64_t* v,const void* m){ uint8_t r,i; + uint8_t a,b,c,d, s0, s1; for(r=0; r<14; ++r){ for(i=0; i<8; ++i){ - blake_large_g(r%10, i, v, (uint64_t*)m); + // blake_large_g(r%10, i, v, (uint64_t*)m); + a = pgm_read_byte(blake_index_lut+4*i+0); + b = pgm_read_byte(blake_index_lut+4*i+1); + c = pgm_read_byte(blake_index_lut+4*i+2); + d = pgm_read_byte(blake_index_lut+4*i+3); + s0 = pgm_read_byte(blake_sigma+16*r+2*i+0); + s1 = pgm_read_byte(blake_sigma+16*r+2*i+1); + v[a] += v[b] + (((uint64_t*)m)[s0] ^ pgm_read_qword(&(blake_c[s1]))); + v[d] = ROTR64(v[d]^v[a], 32); + v[c] += v[d]; + v[b] = ROTR64(v[b]^v[c], 25); + v[a] += v[b] + (((uint64_t*)m)[s1] ^ pgm_read_qword(&(blake_c[s0]))); + v[d] = ROTR64(v[d]^v[a], 16); + v[c] += v[d]; + v[b] = ROTR64(v[b]^v[c], 11); } } } diff --git a/blake_small.c b/blake_small.c index ea89e65..75a2837 100644 --- a/blake_small.c +++ b/blake_small.c @@ -32,8 +32,8 @@ #include "blake_small.h" #include "blake_common.h" -#define BUG_1 0 /* bug compatibility for zero length message */ -#define BUG_2 0 /* bug compatibility for messages of length%512=505...511 */ +#define BUG_1 1 /* bug compatibility for zero length message */ +#define BUG_2 1 /* bug compatibility for messages of length%512=505...511 */ uint32_t blake_c[] PROGMEM = { @@ -54,25 +54,6 @@ uint32_t blake_c[] PROGMEM = { ((0x00ff0000&(a))>>8)| \ (a)>>24 ) -void blake_small_g(uint8_t r, uint8_t i, uint32_t* v, const uint32_t* m){ - uint8_t a,b,c,d, s0, s1; - a = pgm_read_byte(blake_index_lut+4*i+0); - b = pgm_read_byte(blake_index_lut+4*i+1); - c = pgm_read_byte(blake_index_lut+4*i+2); - d = pgm_read_byte(blake_index_lut+4*i+3); - s0 = pgm_read_byte(blake_sigma+16*r+2*i+0); - s1 = pgm_read_byte(blake_sigma+16*r+2*i+1); - v[a] += v[b] + (m[s0] ^ pgm_read_dword(&(blake_c[s1]))); - v[d] = ROTR32(v[d]^v[a], 16); - v[c] += v[d]; - v[b] = ROTR32(v[b]^v[c], 12); - v[a] += v[b] + (m[s1] ^ pgm_read_dword(&(blake_c[s0]))); - v[d] = ROTR32(v[d]^v[a], 8); - v[c] += v[d]; - v[b] = ROTR32(v[b]^v[c], 7); - -} - void blake_small_expand(uint32_t* v, const blake_small_ctx_t* ctx){ uint8_t i; memcpy(v, ctx->h, 8*4); @@ -92,9 +73,36 @@ void blake_small_changeendian(void* dest, const void* src){ void blake_small_compress(uint32_t* v,const void* m){ uint8_t r,i; + uint8_t a,b,c,d, s0, s1; + uint32_t lv[4]; for(r=0; r<10; ++r){ for(i=0; i<8; ++i){ - blake_small_g(r, i, v, (uint32_t*)m); + // blake_small_g(r, i, v, (uint32_t*)m); + a = pgm_read_byte(blake_index_lut+4*i+0); + b = pgm_read_byte(blake_index_lut+4*i+1); + c = pgm_read_byte(blake_index_lut+4*i+2); + d = pgm_read_byte(blake_index_lut+4*i+3); + s0 = pgm_read_byte(blake_sigma+16*r+2*i+0); + s1 = pgm_read_byte(blake_sigma+16*r+2*i+1); + lv[0] = v[a]; + lv[1] = v[b]; + lv[2] = v[c]; + lv[3] = v[d]; + + lv[0] += lv[1] + (((uint32_t*)m)[s0] ^ pgm_read_dword(&(blake_c[s1]))); + lv[3] = ROTR32(lv[3]^lv[0], 16); + lv[2] += lv[3]; + lv[1] = ROTR32(lv[1]^lv[2], 12); + lv[0] += lv[1] + (((uint32_t*)m)[s1] ^ pgm_read_dword(&(blake_c[s0]))); + lv[3] = ROTR32(lv[3]^lv[0], 8); + lv[2] += lv[3]; + lv[1] = ROTR32(lv[1]^lv[2], 7); + + v[a] = lv[0]; + v[b] = lv[1]; + v[c] = lv[2]; + v[d] = lv[3]; + } } } diff --git a/hfal-performance.c b/hfal-performance.c index 391fac8..5108f25 100644 --- a/hfal-performance.c +++ b/hfal-performance.c @@ -69,26 +69,34 @@ void hfal_performance(const hfdesc_t* hd){ cli_putstr_P(PSTR("\r\n blocksize (bits): ")); printvalue(hf.blocksize_b); - startTimer(1); + startTimer(0); + START_TIMER; hf.init(&ctx); + STOP_TIMER; t = stopTimer(); cli_putstr_P(PSTR("\r\n init (cycles): ")); printvalue(t); - startTimer(1); + startTimer(0); + START_TIMER; hf.nextBlock(&ctx, data); + STOP_TIMER; t = stopTimer(); cli_putstr_P(PSTR("\r\n nextBlock (cycles): ")); printvalue(t); - startTimer(1); + startTimer(0); + START_TIMER; hf.lastBlock(&ctx, data, 0); + STOP_TIMER; t = stopTimer(); cli_putstr_P(PSTR("\r\n lastBlock (cycles): ")); printvalue(t); - startTimer(1); + startTimer(0); + START_TIMER; hf.ctx2hash(digest, &ctx); + STOP_TIMER; t = stopTimer(); cli_putstr_P(PSTR("\r\n ctx2hash (cycles): ")); printvalue(t); diff --git a/hfal_blake_large.c b/hfal_blake_large.c index a6a2ac9..f9f8180 100644 --- a/hfal_blake_large.c +++ b/hfal_blake_large.c @@ -42,8 +42,8 @@ const hfdesc_t blake48_desc PROGMEM = { BLAKE48_BLOCKSIZE, 384, (hf_init_fpt)blake48_init, - (hf_nextBlock_fpt)blake48_nextBlock, - (hf_lastBlock_fpt)blake48_lastBlock, + (hf_nextBlock_fpt)blake_large_nextBlock, + (hf_lastBlock_fpt)blake_large_lastBlock, (hf_ctx2hash_fpt)blake48_ctx2hash, (hf_free_fpt)NULL, (hf_mem_fpt)blake48 @@ -57,8 +57,8 @@ const hfdesc_t blake64_desc PROGMEM = { BLAKE64_BLOCKSIZE, 512, (hf_init_fpt)blake64_init, - (hf_nextBlock_fpt)blake64_nextBlock, - (hf_lastBlock_fpt)blake64_lastBlock, + (hf_nextBlock_fpt)blake_large_nextBlock, + (hf_lastBlock_fpt)blake_large_lastBlock, (hf_ctx2hash_fpt)blake64_ctx2hash, (hf_free_fpt)NULL, (hf_mem_fpt)blake64 diff --git a/hfal_blake_small.c b/hfal_blake_small.c index 4d6600c..f63cea5 100644 --- a/hfal_blake_small.c +++ b/hfal_blake_small.c @@ -42,8 +42,8 @@ const hfdesc_t blake28_desc PROGMEM = { BLAKE28_BLOCKSIZE, 224, (hf_init_fpt)blake28_init, - (hf_nextBlock_fpt)blake28_nextBlock, - (hf_lastBlock_fpt)blake28_lastBlock, + (hf_nextBlock_fpt)blake_small_nextBlock, + (hf_lastBlock_fpt)blake_small_lastBlock, (hf_ctx2hash_fpt)blake28_ctx2hash, (hf_free_fpt)NULL, (hf_mem_fpt)blake28 @@ -57,8 +57,8 @@ const hfdesc_t blake32_desc PROGMEM = { BLAKE32_BLOCKSIZE, 256, (hf_init_fpt)blake32_init, - (hf_nextBlock_fpt)blake32_nextBlock, - (hf_lastBlock_fpt)blake32_lastBlock, + (hf_nextBlock_fpt)blake_small_nextBlock, + (hf_lastBlock_fpt)blake_small_lastBlock, (hf_ctx2hash_fpt)blake32_ctx2hash, (hf_free_fpt)NULL, (hf_mem_fpt)blake32 diff --git a/mkfiles/shabal.mk b/mkfiles/shabal.mk index bc12041..20ab49e 100644 --- a/mkfiles/shabal.mk +++ b/mkfiles/shabal.mk @@ -9,7 +9,8 @@ $(ALGO_NAME)_OBJ := shabal-asm.o shabal192-asm.o shabal224-asm.o \ shabal256-asm.o shabal384-asm.o shabal512-asm.o $(ALGO_NAME)_TEST_BIN := main-shabal-test.o debug.o uart.o hexdigit_tab.o \ dbz_strings.o nessie_common.o cli.o string-extras.o performance_test.o \ - nessie_hash_test.o hfal-basic.o hfal_shabal.o shavs.o + nessie_hash_test.o hfal-basic.o hfal_shabal.o shavs.o \ + hfal-nessie.o hfal-performance.o hfal-test.o $(ALGO_NAME)_NESSIE_TEST := test nessie $(ALGO_NAME)_PERFORMANCE_TEST := performance diff --git a/mkfiles/shabal_c.mk b/mkfiles/shabal_c.mk index 9521743..7d1d66b 100644 --- a/mkfiles/shabal_c.mk +++ b/mkfiles/shabal_c.mk @@ -8,7 +8,8 @@ HASHES += $(ALGO_NAME) $(ALGO_NAME)_OBJ := shabal.o shabal192.o shabal224.o shabal256.o shabal384.o shabal512.o $(ALGO_NAME)_TEST_BIN := main-shabal-test.o debug.o uart.o hexdigit_tab.o \ dbz_strings.o nessie_common.o cli.o string-extras.o performance_test.o \ - nessie_hash_test.o hfal-basic.o hfal_shabal.o shavs.o + nessie_hash_test.o hfal-basic.o hfal_shabal.o shavs.o \ + hfal-nessie.o hfal-performance.o hfal-test.o $(ALGO_NAME)_NESSIE_TEST := test nessie $(ALGO_NAME)_PERFORMANCE_TEST := performance diff --git a/test_src/main-shabal-test.c b/test_src/main-shabal-test.c index c103529..93a1900 100644 --- a/test_src/main-shabal-test.c +++ b/test_src/main-shabal-test.c @@ -29,6 +29,9 @@ #include "shabal.h" #include "cli.h" #include "hfal_shabal.h" +#include "hfal-test.h" +#include "hfal-nessie.h" +#include "hfal-performance.h" #include "shavs.h" #include "nessie_hash_test.h" #include "performance_test.h" @@ -39,67 +42,37 @@ char* algo_name = "Shabal"; + +const hfdesc_t* algolist[] PROGMEM = { + (hfdesc_t*)&shabal192_desc, + (hfdesc_t*)&shabal224_desc, + (hfdesc_t*)&shabal256_desc, + (hfdesc_t*)&shabal384_desc, + (hfdesc_t*)&shabal512_desc, + NULL +}; + /***************************************************************************** * additional validation-functions * *****************************************************************************/ void testrun_stdtest_shabal192(void* msg, uint16_t size_b){ - uint8_t hash[192/8]; - - cli_putstr_P(PSTR("\r\n\r\nTest vectors for Shabal (192 bits):")); - - cli_putstr_P(PSTR("\r\nmessage:")); - cli_hexdump_block(msg, (size_b+7)/8, 4, 16); - shabal192(hash, msg, size_b); - cli_putstr_P(PSTR("\r\nhash:")); - cli_hexdump_block(hash, 192/8, 4, 16); + hfal_test(&shabal192_desc, msg, size_b); } void testrun_stdtest_shabal224(void* msg, uint16_t size_b){ - uint8_t hash[224/8]; - - cli_putstr_P(PSTR("\r\n\r\nTest vectors for Shabal (224 bits):")); - - cli_putstr_P(PSTR("\r\nmessage:")); - cli_hexdump_block(msg, (size_b+7)/8, 4, 16); - shabal224(hash, msg, size_b); - cli_putstr_P(PSTR("\r\nhash:")); - cli_hexdump_block(hash, 224/8, 4, 16); + hfal_test(&shabal224_desc, msg, size_b); } void testrun_stdtest_shabal256(void* msg, uint16_t size_b){ - uint8_t hash[256/8]; - - cli_putstr_P(PSTR("\r\n\r\nTest vectors for Shabal (256 bits):")); - - cli_putstr_P(PSTR("\r\nmessage:")); - cli_hexdump_block(msg, (size_b+7)/8, 4, 16); - shabal256(hash, msg, size_b); - cli_putstr_P(PSTR("\r\nhash:")); - cli_hexdump_block(hash, 256/8, 4, 16); + hfal_test(&shabal256_desc, msg, size_b); } void testrun_stdtest_shabal384(void* msg, uint16_t size_b){ - uint8_t hash[384/8]; - - cli_putstr_P(PSTR("\r\n\r\nTest vectors for Shabal (384 bits):")); - - cli_putstr_P(PSTR("\r\nmessage:")); - cli_hexdump_block(msg, (size_b+7)/8, 4, 16); - shabal384(hash, msg, size_b); - cli_putstr_P(PSTR("\r\nhash:")); - cli_hexdump_block(hash, 384/8, 4, 16); + hfal_test(&shabal384_desc, msg, size_b); } void testrun_stdtest_shabal512(void* msg, uint16_t size_b){ - uint8_t hash[512/8]; - - cli_putstr_P(PSTR("\r\n\r\nTest vectors for Shabal (512 bits):")); - - cli_putstr_P(PSTR("\r\nmessage:")); - cli_hexdump_block(msg, (size_b+7)/8, 4, 16); - shabal512(hash, msg, size_b); - cli_putstr_P(PSTR("\r\nhash:")); - cli_hexdump_block(hash, 512/8, 4, 16); + hfal_test(&shabal512_desc, msg, size_b); } void testrun_stdtest_shabal(void){ @@ -204,160 +177,17 @@ void testinit(void){ } void performance_shabal(void){ - uint64_t t; - char str[16]; - uint8_t data[64]; - uint8_t hash[512/8]; - shabal_ctx_t ctx; - - calibrateTimer(); - print_overhead(); - - memset(data, 0, 64); - - startTimer(1); - shabal192_init(&ctx); - t = stopTimer(); - cli_putstr_P(PSTR("\r\n\tctx-gen time (192): ")); - ultoa((unsigned long)t, str, 10); - cli_putstr(str); - - startTimer(1); - shabal224_init(&ctx); - t = stopTimer(); - cli_putstr_P(PSTR("\r\n\tctx-gen time (224): ")); - ultoa((unsigned long)t, str, 10); - cli_putstr(str); - - startTimer(1); - shabal256_init(&ctx); - t = stopTimer(); - cli_putstr_P(PSTR("\r\n\tctx-gen time (256): ")); - ultoa((unsigned long)t, str, 10); - cli_putstr(str); - - startTimer(1); - shabal384_init(&ctx); - t = stopTimer(); - cli_putstr_P(PSTR("\r\n\tctx-gen time (384): ")); - ultoa((unsigned long)t, str, 10); - cli_putstr(str); - - startTimer(1); - shabal512_init(&ctx); - t = stopTimer(); - cli_putstr_P(PSTR("\r\n\tctx-gen time (512): ")); - ultoa((unsigned long)t, str, 10); - cli_putstr(str); - - startTimer(1); - shabal_nextBlock(&ctx, data); - t = stopTimer(); - cli_putstr_P(PSTR("\r\n\tone-block time: ")); - ultoa((unsigned long)t, str, 10); - cli_putstr(str); - - - startTimer(1); - shabal_lastBlock(&ctx, data, 0); - t = stopTimer(); - cli_putstr_P(PSTR("\r\n\tlast block time: ")); - ultoa((unsigned long)t, str, 10); - cli_putstr(str); - - startTimer(1); - shabal192_ctx2hash(hash, &ctx); - t = stopTimer(); - cli_putstr_P(PSTR("\r\n\tctx2hash time (192): ")); - ultoa((unsigned long)t, str, 10); - cli_putstr(str); - - startTimer(1); - shabal224_ctx2hash(hash, &ctx); - t = stopTimer(); - cli_putstr_P(PSTR("\r\n\tctx2hash time (224): ")); - ultoa((unsigned long)t, str, 10); - cli_putstr(str); - - startTimer(1); - shabal256_ctx2hash(hash, &ctx); - t = stopTimer(); - cli_putstr_P(PSTR("\r\n\tctx2hash time (256): ")); - ultoa((unsigned long)t, str, 10); - cli_putstr(str); - - startTimer(1); - shabal384_ctx2hash(hash, &ctx); - t = stopTimer(); - cli_putstr_P(PSTR("\r\n\tctx2hash time (384): ")); - ultoa((unsigned long)t, str, 10); - cli_putstr(str); - - startTimer(1); - shabal512_ctx2hash(hash, &ctx); - t = stopTimer(); - cli_putstr_P(PSTR("\r\n\tctx2hash time (512): ")); - ultoa((unsigned long)t, str, 10); - cli_putstr(str); - - cli_putstr_P(PSTR("\r\n")); - + hfal_performance_multiple(algolist); } void testrun_nessie_shabal(void){ - nessie_hash_ctx.hashsize_b = 192; - nessie_hash_ctx.blocksize_B = 512/8; - nessie_hash_ctx.ctx_size_B = sizeof(shabal_ctx_t); - nessie_hash_ctx.name = "Shabal-192"; - nessie_hash_ctx.hash_init = (nessie_hash_init_fpt)shabal192_init; - nessie_hash_ctx.hash_next = (nessie_hash_next_fpt)shabal_nextBlock; - nessie_hash_ctx.hash_last = (nessie_hash_last_fpt)shabal_lastBlock; - nessie_hash_ctx.hash_conv = (nessie_hash_conv_fpt)shabal192_ctx2hash; - - nessie_hash_run(); - - nessie_hash_ctx.hashsize_b = 224; - nessie_hash_ctx.name = "Shabal-224"; - nessie_hash_ctx.hash_init = (nessie_hash_init_fpt)shabal224_init; - nessie_hash_ctx.hash_conv = (nessie_hash_conv_fpt)shabal224_ctx2hash; - - nessie_hash_run(); - - nessie_hash_ctx.hashsize_b = 256; - nessie_hash_ctx.name = "Shabal-256"; - nessie_hash_ctx.hash_init = (nessie_hash_init_fpt)shabal256_init; - nessie_hash_ctx.hash_conv = (nessie_hash_conv_fpt)shabal256_ctx2hash; - - nessie_hash_run(); - - nessie_hash_ctx.hashsize_b = 384; - nessie_hash_ctx.name = "Shabal-384"; - nessie_hash_ctx.hash_init = (nessie_hash_init_fpt)shabal384_init; - nessie_hash_ctx.hash_conv = (nessie_hash_conv_fpt)shabal384_ctx2hash; - - nessie_hash_run(); - - nessie_hash_ctx.hashsize_b = 512; - nessie_hash_ctx.name = "Shabal-512"; - nessie_hash_ctx.hash_init = (nessie_hash_init_fpt)shabal512_init; - nessie_hash_ctx.hash_conv = (nessie_hash_conv_fpt)shabal512_ctx2hash; - - nessie_hash_run(); - + hfal_nessie_multiple(algolist); } /***************************************************************************** * main * *****************************************************************************/ -const hfdesc_t* algolist[] PROGMEM = { - (hfdesc_t*)&shabal192_desc, - (hfdesc_t*)&shabal224_desc, - (hfdesc_t*)&shabal256_desc, - (hfdesc_t*)&shabal384_desc, - (hfdesc_t*)&shabal512_desc, - NULL -}; const char nessie_str[] PROGMEM = "nessie"; const char test_str[] PROGMEM = "test"; diff --git a/test_src/performance_test.h b/test_src/performance_test.h index 5283b02..bce714b 100644 --- a/test_src/performance_test.h +++ b/test_src/performance_test.h @@ -21,6 +21,12 @@ #include #include +#include +#include + + +#define START_TIMER TCCR1B=1 +#define STOP_TIMER TCCR1B=0 void calibrateTimer(void); void startTimer(uint8_t granularity); -- 2.39.2