#include <avr/pgmspace.h>
#include "bmw_large.h"
-#define SHL64(a,n) ((a)<<(n))
-#define SHR64(a,n) ((a)>>(n))
-#define ROTL64(a,n) (((a)<<(n))|((a)>>(64-(n))))
-#define ROTR64(a,n) (((a)>>(n))|((a)<<(64-(n))))
+#define SHL64(a,n) shiftl64(a,n)
+#define SHR64(a,n) shiftr64(a,n)
+#define ROTL64(a,n) rotl64(a,n)
+#define ROTR64(a,n) rotr64(a,n)
#define TWEAK 1
#define BUG24 0
#define dump_x(a,b,c)
#endif
+static
+uint64_t rotl64(uint64_t a, uint8_t r){
+ return (a<<r)|(a>>(64-r));
+}
+
+static
+uint64_t rotr64(uint64_t a, uint8_t r){
+ return (a>>r)|(a<<(64-r));
+}
+
+static
+uint64_t shiftl64(uint64_t a, uint8_t r){
+ return (a<<r);
+}
+
+static
+uint64_t shiftr64(uint64_t a, uint8_t r){
+ return (a>>r);
+}
+
static
uint64_t bmw_large_s0(uint64_t x){
uint64_t r;
#define K 0x0555555555555555LL
#define MASK 0xFFFFFFFFFFFFFFFFLL
static
-uint64_t k_lut[] PROGMEM = {
+const uint64_t k_lut[] PROGMEM = {
16LL*K, 17LL*K, 18LL*K, 19LL*K,
20LL*K, 21LL*K, 22LL*K, 23LL*K,
24LL*K, 25LL*K, 26LL*K, 27LL*K,
*/
/* the same as above but precomputed to avoid compiler warnings */
static
-uint64_t k_lut[] PROGMEM = {
+const uint64_t k_lut[] PROGMEM = {
0x5555555555555550LL, 0x5aaaaaaaaaaaaaa5LL, 0x5ffffffffffffffaLL,
0x655555555555554fLL, 0x6aaaaaaaaaaaaaa4LL, 0x6ffffffffffffff9LL,
0x755555555555554eLL, 0x7aaaaaaaaaaaaaa3LL, 0x7ffffffffffffff8LL,
#if F0_HACK==2
/* to understand this implementation take a look at f0-opt-table.txt */
-static uint16_t hack_table[5] PROGMEM = { 0x0311, 0xDDB3, 0x2A79, 0x07AA, 0x51C2 };
-static uint8_t offset_table[5] PROGMEM = { 4+16, 6+16, 9+16, 12+16, 13+16 };
+static const uint16_t hack_table[5] PROGMEM = { 0x0311, 0xDDB3, 0x2A79, 0x07AA, 0x51C2 };
+static const uint8_t offset_table[5] PROGMEM = { 4+16, 6+16, 9+16, 12+16, 13+16 };
static
#if F0_HACK==1
static
-uint8_t f0_lut[] PROGMEM ={
+const uint8_t f0_lut[] PROGMEM ={
5<<1, ( 7<<1)+1, (10<<1)+0, (13<<1)+0, (14<<1)+0,
6<<1, ( 8<<1)+1, (11<<1)+0, (14<<1)+0, (15<<1)+1,
0<<1, ( 7<<1)+0, ( 9<<1)+0, (12<<1)+1, (15<<1)+0,
}
void bmw_large_lastBlock(bmw_large_ctx_t* ctx, const void* block, uint16_t length_b){
- uint8_t buffer[128];
+ union {
+ uint8_t v8[128];
+ uint64_t v64[ 16];
+ } buffer;
while(length_b >= BMW_LARGE_BLOCKSIZE){
bmw_large_nextBlock(ctx, block);
length_b -= BMW_LARGE_BLOCKSIZE;
block = (uint8_t*)block + BMW_LARGE_BLOCKSIZE_B;
}
- memset(buffer, 0, 128);
- memcpy(buffer, block, (length_b+7)/8);
- buffer[length_b>>3] |= 0x80 >> (length_b&0x07);
+ memset(buffer.v8, 0, 128);
+ memcpy(buffer.v8, block, (length_b+7)/8);
+ buffer.v8[length_b>>3] |= 0x80 >> (length_b&0x07);
if(length_b+1>128*8-64){
- bmw_large_nextBlock(ctx, buffer);
- memset(buffer, 0, 128-8);
+ bmw_large_nextBlock(ctx, buffer.v8);
+ memset(buffer.v8, 0, 128-8);
ctx->counter -= 1;
}
- *((uint64_t*)&(buffer[128-8])) = (uint64_t)(ctx->counter*1024LL)+(uint64_t)length_b;
- bmw_large_nextBlock(ctx, buffer);
+ buffer.v64[15] = (uint64_t)(ctx->counter*1024LL)+(uint64_t)length_b;
+ bmw_large_nextBlock(ctx, buffer.v8);
#if TWEAK
uint8_t i;
uint64_t q[32];
- memset(buffer, 0xaa, 128);
+ memset(buffer.v8, 0xaa, 128);
for(i=0; i<16; ++i){
- buffer[8*i] = i + 0xa0;
+ buffer.v8[8*i] = i + 0xa0;
}
- bmw_large_f0(q, (uint64_t*)buffer, ctx->h);
- bmw_large_f1(q, ctx->h, (uint64_t*)buffer);
- bmw_large_f2((uint64_t*)buffer, q, ctx->h);
- memcpy(ctx->h, buffer, 128);
+ bmw_large_f0(q, buffer.v64, ctx->h);
+ bmw_large_f1(q, ctx->h, buffer.v64);
+ bmw_large_f2(buffer.v64, q, ctx->h);
+ memcpy(ctx->h, buffer.v8, 128);
#endif
}