From 8d9f80e5ae0d940107681aff616f4466be68c77f Mon Sep 17 00:00:00 2001 From: bg Date: Tue, 30 Dec 2014 14:07:01 +0100 Subject: [PATCH] small optimizations to sha2 / sha256 --- .bzrignore | 1 + sha2/sha2_large_common.c | 32 ++++----- sha2/sha2_small_common.c | 83 ++++++++++++---------- sha256/sha256.c | 146 +++++++++++++++++++++------------------ sha256/sha256.h | 6 +- 5 files changed, 145 insertions(+), 123 deletions(-) create mode 100644 .bzrignore diff --git a/.bzrignore b/.bzrignore new file mode 100644 index 0000000..90ec22b --- /dev/null +++ b/.bzrignore @@ -0,0 +1 @@ +.svn diff --git a/sha2/sha2_large_common.c b/sha2/sha2_large_common.c index 9225440..b74c84e 100644 --- a/sha2/sha2_large_common.c +++ b/sha2/sha2_large_common.c @@ -53,8 +53,8 @@ uint64_t change_endian64(uint64_t x){ uint8_t i=8; do{ r <<= 8; - r |= 0xff&x; - x >>=8; + r |= (uint8_t)x; + x >>= 8; }while(--i); return r; } @@ -63,12 +63,12 @@ uint64_t change_endian64(uint64_t x){ static const uint64_t rotr64(uint64_t x, uint8_t n){ - return (x>>n)|(x<<(64-n)); + return (x >> n) | (x << (64 - n)); } static const uint64_t rotl64(uint64_t x, uint8_t n){ - return (x<>(64-n)); + return (x << n) | (x >> (64 - n)); } static const @@ -86,8 +86,8 @@ uint64_t pgm_read_uint64_t_P(const uint64_t * p){ #define MAJ(x,y,z) (((x)&(y))^((x)&(z))^((y)&(z))) #define SIGMA_0(x) (rotr64((x), 28) ^ rotl64((x), 30) ^ rotl64((x), 25)) #define SIGMA_1(x) (rotr64((x), 14) ^ rotr64((x), 18) ^ rotl64((x), 23)) -#define SIGMA_a(x) (rotr64((x), 1) ^ rotr64((x), 8) ^ ((x)>>7)) -#define SIGMA_b(x) (rotr64((x), 19) ^ rotl64((x), 3) ^ ((x)>>6)) +#define SIGMA_a(x) (rotr64((x), 1) ^ rotr64((x), 8) ^ ((x) >> 7)) +#define SIGMA_b(x) (rotr64((x), 19) ^ rotl64((x), 3) ^ ((x) >> 6)) void sha2_large_common_nextBlock(sha2_large_common_ctx_t *ctx, const void *block){ uint64_t w[16], wx; @@ -95,23 +95,23 @@ void sha2_large_common_nextBlock(sha2_large_common_ctx_t *ctx, const void *block uint64_t t1, t2; const uint64_t *k=sha2_large_common_const; uint8_t i; - i=16; - do{ - w[16-i] = change_endian64(*((const uint64_t*)block)); + i = 16; + do { + w[16 - i] = change_endian64(*((const uint64_t*)block)); block = (uint8_t*)block + 8; - }while(--i); - memcpy(a, ctx->h, 8*8); - for(i=0; i<80; ++i){ - if(i<16){ - wx=w[i]; + } while(--i); + memcpy(a, ctx->h, 8 * 8); + for(i = 0; i < 80; ++i){ + if(i < 16){ + wx = w[i]; }else{ wx = SIGMA_b(w[14]) + w[9] + SIGMA_a(w[1]) + w[0]; - memmove(&(w[0]), &(w[1]), 15*8); + memmove(&(w[0]), &(w[1]), 15 * 8); w[15] = wx; } t1 = a[7] + SIGMA_1(a[4]) + CH(a[4], a[5], a[6]) + pgm_read_uint64_t_P(k++) + wx; t2 = SIGMA_0(a[0]) + MAJ(a[0], a[1], a[2]); - memmove(&(a[1]), &(a[0]), 7*8); + memmove(&(a[1]), &(a[0]), 7 * 8); a[0] = t1 + t2; a[4] += t1; } diff --git a/sha2/sha2_small_common.c b/sha2/sha2_small_common.c index 4072c6b..133d762 100644 --- a/sha2/sha2_small_common.c +++ b/sha2/sha2_small_common.c @@ -22,6 +22,8 @@ #include #include "sha2_small_common.h" +#include +#include #define LITTLE_ENDIAN @@ -29,34 +31,36 @@ * rotate x right by n positions */ static -uint32_t rotr32( uint32_t x, uint8_t n){ - return ((x>>n) | (x<<(32-n))); +uint32_t rotr32(uint32_t x, uint8_t n){ + return ((x >> n) | (x << (32 - n))); } static -uint32_t rotl32( uint32_t x, uint8_t n){ - return ((x<>(32-n))); +uint32_t rotl32(uint32_t x, uint8_t n){ + return ((x << n) | (x >> (32 - n))); } - /*************************************************************************/ // #define CHANGE_ENDIAN32(x) (((x)<<24) | ((x)>>24) | (((x)& 0x0000ff00)<<8) | (((x)& 0x00ff0000)>>8)) static uint32_t change_endian32(uint32_t x){ - return (((x)<<24) | ((x)>>24) | (((x)& 0x0000ff00)<<8) | (((x)& 0x00ff0000)>>8)); + return (((x) << 24) | + ((x) >> 24) | + (((x) & 0x0000ff00) << 8) | + (((x) & 0x00ff0000) >> 8)); } /* sha256 functions as macros for speed and size, cause they are called only once */ -#define CH(x,y,z) (((x)&(y)) ^ ((~(x))&(z))) -#define MAJ(x,y,z) (((x)&(y)) ^ ((x)&(z)) ^ ((y)&(z))) +#define CH(x,y,z) (((x) & (y)) ^ ((~(x)) & (z))) +#define MAJ(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z))) -#define SIGMA_0(x) (rotr32((x), 2) ^ rotr32((x),13) ^ rotl32((x),10)) -#define SIGMA_1(x) (rotr32((x), 6) ^ rotr32((x),11) ^ rotl32((x),7)) -#define SIGMA_a(x) (rotr32((x), 7) ^ rotl32((x),14) ^ ((x)>>3)) -#define SIGMA_b(x) (rotl32((x),15) ^ rotl32((x),13) ^ ((x)>>10)) +#define SIGMA_0(x) (rotr32((x), 2) ^ rotr32((x), 13) ^ rotl32((x), 10)) +#define SIGMA_1(x) (rotr32((x), 6) ^ rotr32((x), 11) ^ rotl32((x), 7)) +#define SIGMA_a(x) (rotr32((x), 7) ^ rotl32((x), 14) ^ ((x) >> 3)) +#define SIGMA_b(x) (rotl32((x), 15) ^ rotl32((x), 13) ^ ((x) >> 10)) const uint32_t k[] PROGMEM = { @@ -88,32 +92,35 @@ void sha2_small_common_nextBlock (sha2_small_common_ctx_t *state, const void *bl #elif defined BIG_ENDIAN memcpy((void*)w, block, 64); #endif -/* - for (i=16; i<64; ++i){ - w[i] = SIGMA_b(w[i-2]) + w[i-7] + SIGMA_a(w[i-15]) + w[i-16]; - } -*/ /* init working variables */ - memcpy((void*)a,(void*)(state->h), 8*4); + memcpy(&a[0], &state->h[0], sizeof(a)); /* do the, fun stuff, */ - for (i=0; i<64; ++i){ - if(i<16){ + for (i = 0; i < 64; ++i){ + if(i < 16){ wx = w[i]; }else{ - wx = SIGMA_b(w[14]) + w[9] + SIGMA_a(w[1]) + w[0]; - memmove(&(w[0]), &(w[1]), 15*4); - w[15] = wx; + wx = SIGMA_b(w[14]) + + w[9] + + SIGMA_a(w[1]) + + w[0]; + memmove(&w[0], &w[1], sizeof(w) - sizeof(w[0])); + w[15] = wx; } - t1 = a[7] + SIGMA_1(a[4]) + CH(a[4],a[5],a[6]) + pgm_read_dword(&k[i]) + wx; - t2 = SIGMA_0(a[0]) + MAJ(a[0],a[1],a[2]); - memmove(&(a[1]), &(a[0]), 7*4); /* a[7]=a[6]; a[6]=a[5]; a[5]=a[4]; a[4]=a[3]; a[3]=a[2]; a[2]=a[1]; a[1]=a[0]; */ + t1 = a[7] + + SIGMA_1(a[4]) + + CH(a[4], a[5], a[6]) + + pgm_read_dword(&k[i]) + wx; + t2 = SIGMA_0(a[0]) + + MAJ(a[0], a[1], a[2]); + t2 += t1; + memmove(&a[1], &a[0], sizeof(a) - sizeof(a[0])); a[4] += t1; - a[0] = t1 + t2; + a[0] = t2; } /* update, the, state, */ - for (i=0; i<8; ++i){ + for (i = 0; i < 8; ++i){ state->h[i] += a[i]; } state->length += 1; @@ -123,19 +130,19 @@ void sha2_small_common_nextBlock (sha2_small_common_ctx_t *state, const void *bl void sha2_small_common_lastBlock(sha2_small_common_ctx_t *state, const void *block, uint16_t length_b){ uint8_t lb[512/8]; /* local block */ uint64_t len; - while(length_b>=512){ + while(length_b >= 512){ sha2_small_common_nextBlock(state, block); length_b -= 512; - block = (uint8_t*)block+64; + block = (uint8_t*)block + 64; } - len = state->length*512 + length_b; + len = state->length * 512 + length_b; memset(lb, 0, 64); - memcpy(lb, block, (length_b+7)/8); + memcpy(lb, block, (length_b + 7) / 8); /* set the final one bit */ - lb[length_b/8] |= 0x80>>(length_b & 0x7); + lb[length_b / 8] |= 0x80 >> (length_b & 0x7); /* pad with zeros */ - if (length_b>=512-64){ /* not enouth space for 64bit length value */ + if (length_b >= 512 - 64){ /* not enough space for 64bit length value */ sha2_small_common_nextBlock(state, lb); memset(lb, 0, 64); } @@ -143,10 +150,10 @@ void sha2_small_common_lastBlock(sha2_small_common_ctx_t *state, const void *blo #if defined LITTLE_ENDIAN /* this is now rolled up */ uint8_t i; - i=7; - do{ - lb[63-i] = ((uint8_t*)&len)[i]; - }while(i--); + i = 7; + do { + lb[63 - i] = ((uint8_t*)&len)[i]; + } while(i--); #elif defined BIG_ENDIAN *((uint64_t)&(lb[56])) = len; #endif diff --git a/sha256/sha256.c b/sha256/sha256.c index 3ee4b93..e3e6c2c 100644 --- a/sha256/sha256.c +++ b/sha256/sha256.c @@ -31,6 +31,7 @@ #include #include /* for memcpy, memmove, memset */ +#include #include "sha256.h" #define LITTLE_ENDIAN @@ -44,9 +45,9 @@ /*************************************************************************/ -uint32_t sha256_init_vector[]={ - 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, - 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19 }; +static const uint32_t sha256_init_vector[] PROGMEM = { + 0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL, + 0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL }; /*************************************************************************/ @@ -58,8 +59,8 @@ uint32_t sha256_init_vector[]={ * @return none */ void sha256_init(sha256_ctx_t *state){ - state->length=0; - memcpy(state->h, sha256_init_vector, 8*4); + state->length = 0; + memcpy_P(state->h, sha256_init_vector, 8 * 4); } /*************************************************************************/ @@ -68,7 +69,7 @@ void sha256_init(sha256_ctx_t *state){ * rotate x right by n positions */ uint32_t rotr32( uint32_t x, uint8_t n){ - return ((x>>n) | (x<<(32-n))); + return ((x >> n) | (x << (32 - n))); } @@ -77,7 +78,10 @@ uint32_t rotr32( uint32_t x, uint8_t n){ // #define CHANGE_ENDIAN32(x) (((x)<<24) | ((x)>>24) | (((x)& 0x0000ff00)<<8) | (((x)& 0x00ff0000)>>8)) uint32_t change_endian32(uint32_t x){ - return (((x)<<24) | ((x)>>24) | (((x)& 0x0000ff00)<<8) | (((x)& 0x00ff0000)>>8)); + return ( ((x) << 24) + | ((x) >> 24) + | (((x) & 0x0000ff00UL) << 8) + | (((x) & 0x00ff0000UL) >> 8) ); } @@ -85,66 +89,73 @@ uint32_t change_endian32(uint32_t x){ /* sha256 functions as macros for speed and size, cause they are called only once */ -#define CH(x,y,z) (((x)&(y)) ^ ((~(x))&(z))) -#define MAJ(x,y,z) (((x)&(y)) ^ ((x)&(z)) ^ ((y)&(z))) +#define CH(x,y,z) (((x) & (y)) ^ ((~(x)) & (z))) +#define MAJ(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z))) -#define SIGMA0(x) (rotr32((x),2) ^ rotr32((x),13) ^ rotr32((x),22)) -#define SIGMA1(x) (rotr32((x),6) ^ rotr32((x),11) ^ rotr32((x),25)) -#define SIGMA_a(x) (rotr32((x),7) ^ rotr32((x),18) ^ ((x)>>3)) -#define SIGMA_b(x) (rotr32((x),17) ^ rotr32((x),19) ^ ((x)>>10)) +#define SIGMA0(x) (rotr32((x), 2) ^ rotr32((x), 13) ^ rotr32((x), 22)) +#define SIGMA1(x) (rotr32((x), 6) ^ rotr32((x), 11) ^ rotr32((x), 25)) +#define SIGMA_a(x) (rotr32((x), 7) ^ rotr32((x), 18) ^ ((x) >> 3)) +#define SIGMA_b(x) (rotr32((x), 17) ^ rotr32((x), 19) ^ ((x) >> 10)) -uint32_t k[]={ - 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, - 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, - 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, - 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, - 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, - 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, - 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, - 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 +static const uint32_t k[] PROGMEM = { + 0x428a2f98UL, 0x71374491UL, 0xb5c0fbcfUL, 0xe9b5dba5UL, 0x3956c25bUL, 0x59f111f1UL, 0x923f82a4UL, 0xab1c5ed5UL, + 0xd807aa98UL, 0x12835b01UL, 0x243185beUL, 0x550c7dc3UL, 0x72be5d74UL, 0x80deb1feUL, 0x9bdc06a7UL, 0xc19bf174UL, + 0xe49b69c1UL, 0xefbe4786UL, 0x0fc19dc6UL, 0x240ca1ccUL, 0x2de92c6fUL, 0x4a7484aaUL, 0x5cb0a9dcUL, 0x76f988daUL, + 0x983e5152UL, 0xa831c66dUL, 0xb00327c8UL, 0xbf597fc7UL, 0xc6e00bf3UL, 0xd5a79147UL, 0x06ca6351UL, 0x14292967UL, + 0x27b70a85UL, 0x2e1b2138UL, 0x4d2c6dfcUL, 0x53380d13UL, 0x650a7354UL, 0x766a0abbUL, 0x81c2c92eUL, 0x92722c85UL, + 0xa2bfe8a1UL, 0xa81a664bUL, 0xc24b8b70UL, 0xc76c51a3UL, 0xd192e819UL, 0xd6990624UL, 0xf40e3585UL, 0x106aa070UL, + 0x19a4c116UL, 0x1e376c08UL, 0x2748774cUL, 0x34b0bcb5UL, 0x391c0cb3UL, 0x4ed8aa4aUL, 0x5b9cca4fUL, 0x682e6ff3UL, + 0x748f82eeUL, 0x78a5636fUL, 0x84c87814UL, 0x8cc70208UL, 0x90befffaUL, 0xa4506cebUL, 0xbef9a3f7UL, 0xc67178f2UL }; /*************************************************************************/ /** - * block must be, 512, Bit = 64, Byte, long !!! + * block must be 512 Bit = 64 Byte long !!! */ void sha256_nextBlock (sha256_ctx_t *state, const void *block){ - uint32_t w[64]; /* this is 256, byte, large, */ + uint32_t w[16]; /* this is 64 Byte large, */ uint8_t i; - uint32_t a[8],t1,t2; + uint32_t a[8], t1, t2; - /* init w */ + /* init working variables */ + memcpy((void*)a,(void*)(state->h), 8 * 4); + + /* init w */ #if defined LITTLE_ENDIAN - for (i=0; i<16; ++i){ - w[i]= change_endian32(((uint32_t*)block)[i]); - } + for (i = 0; i < 16; ++i) { + w[i] = change_endian32(((uint32_t*)block)[i]); + } #elif defined BIG_ENDIAN memcpy((void*)w, block, 64); #endif - for (i=16; i<64; ++i){ - w[i] = SIGMA_b(w[i-2]) + w[i-7] + SIGMA_a(w[i-15]) + w[i-16]; - } - - /* init working variables */ - memcpy((void*)a,(void*)(state->h), 8*4); - +/* + for (i = 16; i < 64; ++i) { + w[i] = SIGMA_b(w[i - 2]) + w[i - 7] + SIGMA_a(w[i - 15]) + w[i - 16]; + } +*/ /* do the, fun stuff, */ - for (i=0; i<64; ++i){ - t1 = a[7] + SIGMA1(a[4]) + CH(a[4],a[5],a[6]) + k[i] + w[i]; - t2 = SIGMA0(a[0]) + MAJ(a[0],a[1],a[2]); - memmove(&(a[1]), &(a[0]), 7*4); /* a[7]=a[6]; a[6]=a[5]; a[5]=a[4]; a[4]=a[3]; a[3]=a[2]; a[2]=a[1]; a[1]=a[0]; */ - a[4] += t1; - a[0] = t1 + t2; - } + for (i=0; i<64; ++i) { + if (i > 15) { + w[i % 16] = SIGMA_b(w[(i + 14) % 16]) + + w[(i + 9) % 16] + + SIGMA_a(w[(i + 1) % 16]) + + w[i % 16]; + } + t1 = a[7] + SIGMA1(a[4]) + CH(a[4], a[5], a[6]) + pgm_read_dword(&k[i]) + w[i % 16]; + t2 = SIGMA0(a[0]) + MAJ(a[0], a[1], a[2]); + memmove(&(a[1]), &(a[0]), 7 * 4); /* a[7]=a[6]; a[6]=a[5]; a[5]=a[4]; a[4]=a[3]; a[3]=a[2]; a[2]=a[1]; a[1]=a[0]; */ + a[4] += t1; + a[0] = t1 + t2; + } /* update, the, state, */ - for (i=0; i<8; ++i){ - state->h[i] += a[i]; - } - state->length += 512; + for (i = 0; i < 8; ++i){ + state->h[i] += a[i]; + } + state->length += 1; } @@ -159,39 +170,42 @@ void sha256_nextBlock (sha256_ctx_t *state, const void *block){ * if you have a message with bits at the end, the byte must be padded with zeros */ void sha256_lastBlock(sha256_ctx_t *state, const void *block, uint16_t length){ - uint8_t lb[SHA256_BLOCK_BITS/8]; /* local block */ + uint8_t lb[SHA256_BLOCK_BITS / 8]; /* local block */ + uint64_t msg_len; while(length>=SHA256_BLOCK_BITS){ sha256_nextBlock(state, block); length -= SHA256_BLOCK_BITS; - block = (uint8_t*)block+SHA256_BLOCK_BYTES; + block = (uint8_t*)block + SHA256_BLOCK_BYTES; } - state->length += length; - memcpy (&(lb[0]), block, length/8); + msg_len = state->length; + msg_len *= 512; + msg_len += length; + memcpy (&(lb[0]), block, length / 8); /* set the final one bit */ - if (length & 0x7){ // if we have single bits at the end - lb[length/8] = ((uint8_t*)(block))[length/8]; + if (length & 7){ // if we have single bits at the end + lb[length / 8] = ((uint8_t*)(block))[length / 8]; } else { - lb[length/8] = 0; + lb[length / 8] = 0; } - lb[length/8] |= 0x80>>(length & 0x7); - length =(length >> 3) + 1; /* from now on length contains the number of BYTES in lb*/ + lb[length / 8] |= 0x80 >> (length & 7); + length = (length / 8) + 1; /* from now on length contains the number of BYTES in lb*/ /* pad with zeros */ - if (length>64-8){ /* not enouth space for 64bit length value */ - memset((void*)(&(lb[length])), 0, 64-length); + if (length > 64 - 8){ /* not enouth space for 64bit length value */ + memset((void*)(&(lb[length])), 0, 64 - length); sha256_nextBlock(state, lb); - state->length -= 512; length = 0; } - memset((void*)(&(lb[length])), 0, 56-length); + memset((void*)(&(lb[length])), 0, 56 - length); /* store the 64bit length value */ #if defined LITTLE_ENDIAN - /* this is now rolled up */ - uint8_t i; - for (i=1; i<=8; ++i){ - lb[55+i] = (uint8_t)(state->length>>(64- 8*i)); - } + /* this is now rolled up */ + uint8_t i = 7; + do { + lb[56 + i] = msg_len & 0xff; + msg_len >>= 8; + } while (i--); #elif defined BIG_ENDIAN *((uint64_t)&(lb[56])) = state->length; #endif @@ -223,7 +237,7 @@ void sha256(sha256_hash_t *dest, const void *msg, uint32_t length){ /* length co void sha256_ctx2hash(sha256_hash_t *dest, const sha256_ctx_t *state){ #if defined LITTLE_ENDIAN uint8_t i; - for(i=0; i<8; ++i){ + for(i = 0; i < 8; ++i){ ((uint32_t*)dest)[i] = change_endian32(state->h[i]); } #elif BIG_ENDIAN diff --git a/sha256/sha256.h b/sha256/sha256.h index 78704f5..03f3a14 100644 --- a/sha256/sha256.h +++ b/sha256/sha256.h @@ -49,9 +49,9 @@ */ #define SHA256_HASH_BITS 256 -#define SHA256_HASH_BYTES (SHA256_HASH_BITS/8) +#define SHA256_HASH_BYTES (SHA256_HASH_BITS / 8) #define SHA256_BLOCK_BITS 512 -#define SHA256_BLOCK_BYTES (SHA256_BLOCK_BITS/8) +#define SHA256_BLOCK_BYTES (SHA256_BLOCK_BITS / 8) /** \typedef sha256_ctx_t * \brief SHA-256 context type @@ -60,7 +60,7 @@ */ typedef struct { uint32_t h[8]; - uint64_t length; + uint32_t length; } sha256_ctx_t; /** \typedef sha256_hash_t -- 2.39.5