From f18cfec99ce8caf8878a2d87acc69986e08bf788 Mon Sep 17 00:00:00 2001 From: bg Date: Sat, 12 Dec 2009 21:05:01 +0000 Subject: [PATCH] some improvments for BMW --- avr-asm-macros.S | 26 +++++------ bmw/bmw_small-asm.S | 100 ++++++++++++++++++++++++++++++++++++------ bmw/bmw_small-cstub.c | 80 +++++++-------------------------- 3 files changed, 116 insertions(+), 90 deletions(-) diff --git a/avr-asm-macros.S b/avr-asm-macros.S index 829562b..63f9303 100644 --- a/avr-asm-macros.S +++ b/avr-asm-macros.S @@ -17,15 +17,15 @@ along with this program. If not, see . */ -/* +/* * File: avr-asm-macros.S * Author: Daniel Otte * Date: 2008-08-13 * License: GPLv3 or later * Description: some macros which are quite usefull - * + * */ - + #include /******************************************************************************* @@ -34,14 +34,14 @@ .macro push_ p1:req, p2:vararg push \p1 -.ifnb \p2 +.ifnb \p2 push_ \p2 .endif .endm .macro pop_ p1:req, p2:vararg pop \p1 -.ifnb \p2 +.ifnb \p2 pop_ \p2 .endif .endm @@ -50,13 +50,13 @@ push \from .if \to-\from push_range "(\from+1)",\to -.endif +.endif .endm .macro pop_range from:req, to:req pop \to .if \to-\from - pop_range \from,"(\to-1)" + pop_range \from,"(\to-1)" .endif .endm @@ -64,7 +64,7 @@ in r0, _SFR_IO_ADDR(SREG) in \reg1, _SFR_IO_ADDR(SPL) in \reg2, _SFR_IO_ADDR(SPH) - sbiw \reg1, \size + sbiw \reg1, \size cli out _SFR_IO_ADDR(SPH), \reg2 out _SFR_IO_ADDR(SREG), r0 @@ -75,7 +75,7 @@ in r0, _SFR_IO_ADDR(SREG) in \reg1, _SFR_IO_ADDR(SPL) in \reg2, _SFR_IO_ADDR(SPH) - adiw \reg1, \size + adiw \reg1, \size cli out _SFR_IO_ADDR(SPH), \reg2 out _SFR_IO_ADDR(SREG), r0 @@ -88,7 +88,7 @@ in \reg1, _SFR_IO_ADDR(SPL) in \reg2, _SFR_IO_ADDR(SPH) subi \reg1, lo8(\size) - sbci \reg2, hi8(\size) + sbci \reg2, hi8(\size) cli out _SFR_IO_ADDR(SPH), \reg2 out _SFR_IO_ADDR(SREG), r0 @@ -100,7 +100,7 @@ in \reg1, _SFR_IO_ADDR(SPL) in \reg2, _SFR_IO_ADDR(SPH) adiw \reg1, 63 - adiw \reg1, (\size-63) + adiw \reg1, (\size-63) cli out _SFR_IO_ADDR(SPH), \reg2 out _SFR_IO_ADDR(SREG), r0 @@ -113,7 +113,7 @@ in \reg2, _SFR_IO_ADDR(SPH) adiw \reg1, 63 adiw \reg1, 63 - adiw \reg1, (\size-63*2) + adiw \reg1, (\size-63*2) cli out _SFR_IO_ADDR(SPH), \reg2 out _SFR_IO_ADDR(SREG), r0 @@ -131,7 +131,7 @@ add \reg1, r16 adc \reg2, r17 pop r17 - pop r16 + pop r16 cli out _SFR_IO_ADDR(SPH), \reg2 out _SFR_IO_ADDR(SREG), r0 diff --git a/bmw/bmw_small-asm.S b/bmw/bmw_small-asm.S index 62bd166..ef05e96 100644 --- a/bmw/bmw_small-asm.S +++ b/bmw/bmw_small-asm.S @@ -800,8 +800,8 @@ q0 = 6 q1 = 7 .global bmw_small_f1 bmw_small_f1: - push_range 2, 7 - push_range 28, 29 +; push_range 2, 7 +; push_range 28, 29 push r16 movw q0, r24 movw m0, r22 @@ -843,8 +843,8 @@ bmw_small_f1: cpi r16, 16 brne 1b pop r16 - pop_range 28, 29 - pop_range 2, 7 +; pop_range 28, 29 +; pop_range 2, 7 ret /******************************************************************************* @@ -960,9 +960,9 @@ f0_jumptable: .global bmw_small_f0 bmw_small_f0: - push_range 28, 29 - push_range 4, 11 - push_range 16, 17 +; push_range 28, 29 +; push_range 4, 11 +; push_range 16, 17 /* h[i] ^= m[i]; q[i]= 0 */ movw r26, h0 ; h movw r30, m0 ; m @@ -1105,9 +1105,9 @@ bmw_small_f0: adc acc0, acc1 st Z+, acc0 - pop_range 16, 17 - pop_range 4, 11 - pop_range 28, 29 +; pop_range 16, 17 +; pop_range 4, 11 +; pop_range 28, 29 ret /******************************************************************************* @@ -1220,8 +1220,8 @@ bmw_small_f2: st X+, r0 dec r18 brne 1b - push_range 28, 29 - push_range 2, 17 +; push_range 28, 29 +; push_range 2, 17 movw q0, r22 movw h0, r24 /* calc xl */ @@ -1655,8 +1655,8 @@ bmw_small_f2: rcall tshiftr modify_h_2 5 bmw_small_f2_exit: - pop_range 2, 17 - pop_range 28, 29 +; pop_range 2, 17 +; pop_range 28, 29 ret cli_putb: @@ -1695,3 +1695,75 @@ cli_putchar: call cli_putc pop_range 18, 31 ret + +/******************************************************************************* +* void bmw_small_nextBlock(bmw_small_ctx_t* ctx, const void* block){ +* uint32_t q[32]; +* dump_x(block, 16, 'M'); +* bmw_small_f0(ctx->h, block, q); +* dump_x(q, 16, 'Q'); +* bmw_small_f1(q, block, ctx->h); +* dump_x(q, 32, 'Q'); +* bmw_small_f2(ctx->h, q, block); +* ctx->counter += 1; +* ctx_dump(ctx); +* } +* +* param ctx: r24:r25 +* param block: r22:r23 +*/ +h0 = 2 +h1 = 3 +b0 = 4 +b1 = 5 +q0 = 6 +q1 = 7 +.global bmw_small_nextBlock +bmw_small_nextBlock: + push_range 28, 29 + push_range 2, 17 + stack_alloc_large 32*4, 30, 31 + adiw r30, 1 + movw q0, r30 + movw h0, r24 + movw b0, r22 + /* increment counter */ + movw r30, r24 + adiw r30, 60 + ldd r22, Z+4 + ldd r23, Z+5 + ldd r24, Z+6 + ldd r25, Z+7 + ldi r21, 1 + add r22, r21 + adc r23, r1 + adc r24, r1 + adc r25, r1 + std Z+4, r22 + std Z+5, r23 + std Z+6, r24 + std Z+7, r25 + /* call bmw_small_f0(ctx->h, block, q) */ + movw r24, h0 + movw r22, b0 + movw r20, q0 + push_ q1, q0, b1, b0, h1, h0 + rcall bmw_small_f0 + /* call bmw_small_f1(q, block, ctx->h) */ + pop_ 20, 21, 22, 23, 24, 25, + push_ 21, 20, 25, 24, 23, 22 + rcall bmw_small_f1 + /* call bmw_small_f2(ctx->h, q, block) */ + pop_ 20, 21, 22, 23, 24, 25, + rcall bmw_small_f2 + stack_free_large3 32*4 + pop_range 2, 17 + pop_range 28, 29 + ret + + + + + + + diff --git a/bmw/bmw_small-cstub.c b/bmw/bmw_small-cstub.c index af26144..5b34145 100644 --- a/bmw/bmw_small-cstub.c +++ b/bmw/bmw_small-cstub.c @@ -77,51 +77,9 @@ void bmw_small_f1(uint32_t* q, const void* m, const void* h); void bmw_small_f0(uint32_t* h, const void* m, uint32_t* q); void bmw_small_f2(uint32_t* h, uint32_t* q, const void* m); +void bmw_small_nextBlock(bmw_small_ctx_t* ctx, const void* block); /* -static -void bmw_small_f2(uint32_t* h, const uint32_t* q, const void* m){ - uint32_t xl=0, xh; - uint8_t i; - for(i=16;i<24;++i){ - xl ^= q[i]; - } - xh = xl; - for(i=24;i<32;++i){ - xh ^= q[i]; - } -#if DEBUG - cli_putstr_P(PSTR("\r\n XL = ")); - cli_hexdump_rev(&xl, 4); - cli_putstr_P(PSTR("\r\n XH = ")); - cli_hexdump_rev(&xh, 4); -#endif - memcpy(h, m, 16*4); - h[0] ^= SHL32(xh, 5) ^ SHR32(q[16], 5); - h[5] ^= SHL32(xh, 6) ^ SHR32(q[21], 6); - h[3] ^= SHR32(xh, 1) ^ SHL32(q[19], 5); - h[4] ^= SHR32(xh, 3) ^ q[20]; - h[6] ^= SHR32(xh, 4) ^ SHL32(q[22], 6); - h[2] ^= SHR32(xh, 5) ^ SHL32(q[18], 5); - h[1] ^= SHR32(xh, 7) ^ SHL32(q[17], 8); - h[7] ^= SHR32(xh,11) ^ SHL32(q[23], 2); - for(i=0; i<8; ++i){ - h[i] += xl ^ q[24+i] ^ q[i]; - } - for(i=0; i<8; ++i){ - h[8+i] ^= xh ^ q[24+i]; - h[8+i] += ROTL32(h[(4+i)%8],i+9); - } - h[11] += SHL32(xl, 4) ^ q[18] ^ q[11]; - h[10] += SHL32(xl, 6) ^ q[17] ^ q[10]; - h[ 8] += SHL32(xl, 8) ^ q[23] ^ q[ 8]; - h[15] += SHR32(xl, 2) ^ q[22] ^ q[15]; - h[12] += SHR32(xl, 3) ^ q[19] ^ q[12]; - h[13] += SHR32(xl, 4) ^ q[20] ^ q[13]; - h[ 9] += SHR32(xl, 6) ^ q[16] ^ q[ 9]; - h[14] += SHR32(xl, 7) ^ q[21] ^ q[14]; -} -*/ void bmw_small_nextBlock(bmw_small_ctx_t* ctx, const void* block){ uint32_t q[32]; dump_x(block, 16, 'M'); @@ -133,39 +91,35 @@ void bmw_small_nextBlock(bmw_small_ctx_t* ctx, const void* block){ ctx->counter += 1; ctx_dump(ctx); } +*/ void bmw_small_lastBlock(bmw_small_ctx_t* ctx, const void* block, uint16_t length_b){ - uint8_t buffer[64]; + struct { + uint8_t buffer[64]; + uint32_t ctr; + } pctx; while(length_b >= BMW_SMALL_BLOCKSIZE){ bmw_small_nextBlock(ctx, block); length_b -= BMW_SMALL_BLOCKSIZE; block = (uint8_t*)block + BMW_SMALL_BLOCKSIZE_B; } - memset(buffer, 0, 64); - memcpy(buffer, block, (length_b+7)/8); - buffer[length_b>>3] |= 0x80 >> (length_b&0x07); + memset(pctx.buffer, 0, 64); + memcpy(pctx.buffer, block, (length_b+7)/8); + pctx.buffer[length_b>>3] |= 0x80 >> (length_b&0x07); if(length_b+1>64*8-64){ - bmw_small_nextBlock(ctx, buffer); - memset(buffer, 0, 64-8); + bmw_small_nextBlock(ctx, pctx.buffer); + memset(pctx.buffer, 0, 64-8); ctx->counter -= 1; } - *((uint64_t*)&(buffer[64-8])) = (uint64_t)(ctx->counter*512LL)+(uint64_t)length_b; - bmw_small_nextBlock(ctx, buffer); + *((uint64_t*)&(pctx.buffer[64-8])) = (uint64_t)(ctx->counter*512LL)+(uint64_t)length_b; + bmw_small_nextBlock(ctx, pctx.buffer); uint8_t i; - uint32_t q[32]; - memset(buffer, 0xaa, 64); + memset(pctx.buffer, 0xaa, 64); for(i=0; i<16;++i){ - buffer[i*4] = i+0xa0; + pctx.buffer[i*4] = i+0xa0; } -// dump_x(buffer, 16, 'A'); - dump_x(ctx->h, 16, 'M'); - bmw_small_f0((uint32_t*)buffer, ctx->h, q); - dump_x(buffer, 16, 'a'); - dump_x(q, 16, 'Q'); - bmw_small_f1(q, ctx->h, (uint32_t*)buffer); - dump_x(q, 32, 'Q'); - bmw_small_f2((uint32_t*)buffer, q, ctx->h); - memcpy(ctx->h, buffer, 64); + bmw_small_nextBlock((bmw_small_ctx_t*)&pctx, ctx->h); + memcpy(ctx->h, pctx.buffer, 64); } void bmw224_init(bmw224_ctx_t* ctx){ -- 2.39.5