along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-/*
+/*
* File: avr-asm-macros.S
* Author: Daniel Otte
* Date: 2008-08-13
* License: GPLv3 or later
* Description: some macros which are quite usefull
- *
+ *
*/
-
+
#include <avr/io.h>
/*******************************************************************************
.macro push_ p1:req, p2:vararg
push \p1
-.ifnb \p2
+.ifnb \p2
push_ \p2
.endif
.endm
.macro pop_ p1:req, p2:vararg
pop \p1
-.ifnb \p2
+.ifnb \p2
pop_ \p2
.endif
.endm
push \from
.if \to-\from
push_range "(\from+1)",\to
-.endif
+.endif
.endm
.macro pop_range from:req, to:req
pop \to
.if \to-\from
- pop_range \from,"(\to-1)"
+ pop_range \from,"(\to-1)"
.endif
.endm
in r0, _SFR_IO_ADDR(SREG)
in \reg1, _SFR_IO_ADDR(SPL)
in \reg2, _SFR_IO_ADDR(SPH)
- sbiw \reg1, \size
+ sbiw \reg1, \size
cli
out _SFR_IO_ADDR(SPH), \reg2
out _SFR_IO_ADDR(SREG), r0
in r0, _SFR_IO_ADDR(SREG)
in \reg1, _SFR_IO_ADDR(SPL)
in \reg2, _SFR_IO_ADDR(SPH)
- adiw \reg1, \size
+ adiw \reg1, \size
cli
out _SFR_IO_ADDR(SPH), \reg2
out _SFR_IO_ADDR(SREG), r0
in \reg1, _SFR_IO_ADDR(SPL)
in \reg2, _SFR_IO_ADDR(SPH)
subi \reg1, lo8(\size)
- sbci \reg2, hi8(\size)
+ sbci \reg2, hi8(\size)
cli
out _SFR_IO_ADDR(SPH), \reg2
out _SFR_IO_ADDR(SREG), r0
in \reg1, _SFR_IO_ADDR(SPL)
in \reg2, _SFR_IO_ADDR(SPH)
adiw \reg1, 63
- adiw \reg1, (\size-63)
+ adiw \reg1, (\size-63)
cli
out _SFR_IO_ADDR(SPH), \reg2
out _SFR_IO_ADDR(SREG), r0
in \reg2, _SFR_IO_ADDR(SPH)
adiw \reg1, 63
adiw \reg1, 63
- adiw \reg1, (\size-63*2)
+ adiw \reg1, (\size-63*2)
cli
out _SFR_IO_ADDR(SPH), \reg2
out _SFR_IO_ADDR(SREG), r0
add \reg1, r16
adc \reg2, r17
pop r17
- pop r16
+ pop r16
cli
out _SFR_IO_ADDR(SPH), \reg2
out _SFR_IO_ADDR(SREG), r0
q1 = 7
.global bmw_small_f1
bmw_small_f1:
- push_range 2, 7
- push_range 28, 29
+; push_range 2, 7
+; push_range 28, 29
push r16
movw q0, r24
movw m0, r22
cpi r16, 16
brne 1b
pop r16
- pop_range 28, 29
- pop_range 2, 7
+; pop_range 28, 29
+; pop_range 2, 7
ret
/*******************************************************************************
.global bmw_small_f0
bmw_small_f0:
- push_range 28, 29
- push_range 4, 11
- push_range 16, 17
+; push_range 28, 29
+; push_range 4, 11
+; push_range 16, 17
/* h[i] ^= m[i]; q[i]= 0 */
movw r26, h0 ; h
movw r30, m0 ; m
adc acc0, acc1
st Z+, acc0
- pop_range 16, 17
- pop_range 4, 11
- pop_range 28, 29
+; pop_range 16, 17
+; pop_range 4, 11
+; pop_range 28, 29
ret
/*******************************************************************************
st X+, r0
dec r18
brne 1b
- push_range 28, 29
- push_range 2, 17
+; push_range 28, 29
+; push_range 2, 17
movw q0, r22
movw h0, r24
/* calc xl */
rcall tshiftr
modify_h_2 5
bmw_small_f2_exit:
- pop_range 2, 17
- pop_range 28, 29
+; pop_range 2, 17
+; pop_range 28, 29
ret
cli_putb:
call cli_putc
pop_range 18, 31
ret
+
+/*******************************************************************************
+* void bmw_small_nextBlock(bmw_small_ctx_t* ctx, const void* block){
+* uint32_t q[32];
+* dump_x(block, 16, 'M');
+* bmw_small_f0(ctx->h, block, q);
+* dump_x(q, 16, 'Q');
+* bmw_small_f1(q, block, ctx->h);
+* dump_x(q, 32, 'Q');
+* bmw_small_f2(ctx->h, q, block);
+* ctx->counter += 1;
+* ctx_dump(ctx);
+* }
+*
+* param ctx: r24:r25
+* param block: r22:r23
+*/
+h0 = 2
+h1 = 3
+b0 = 4
+b1 = 5
+q0 = 6
+q1 = 7
+.global bmw_small_nextBlock
+bmw_small_nextBlock:
+ push_range 28, 29
+ push_range 2, 17
+ stack_alloc_large 32*4, 30, 31
+ adiw r30, 1
+ movw q0, r30
+ movw h0, r24
+ movw b0, r22
+ /* increment counter */
+ movw r30, r24
+ adiw r30, 60
+ ldd r22, Z+4
+ ldd r23, Z+5
+ ldd r24, Z+6
+ ldd r25, Z+7
+ ldi r21, 1
+ add r22, r21
+ adc r23, r1
+ adc r24, r1
+ adc r25, r1
+ std Z+4, r22
+ std Z+5, r23
+ std Z+6, r24
+ std Z+7, r25
+ /* call bmw_small_f0(ctx->h, block, q) */
+ movw r24, h0
+ movw r22, b0
+ movw r20, q0
+ push_ q1, q0, b1, b0, h1, h0
+ rcall bmw_small_f0
+ /* call bmw_small_f1(q, block, ctx->h) */
+ pop_ 20, 21, 22, 23, 24, 25,
+ push_ 21, 20, 25, 24, 23, 22
+ rcall bmw_small_f1
+ /* call bmw_small_f2(ctx->h, q, block) */
+ pop_ 20, 21, 22, 23, 24, 25,
+ rcall bmw_small_f2
+ stack_free_large3 32*4
+ pop_range 2, 17
+ pop_range 28, 29
+ ret
+
+
+
+
+
+
+
void bmw_small_f1(uint32_t* q, const void* m, const void* h);
void bmw_small_f0(uint32_t* h, const void* m, uint32_t* q);
void bmw_small_f2(uint32_t* h, uint32_t* q, const void* m);
+void bmw_small_nextBlock(bmw_small_ctx_t* ctx, const void* block);
/*
-static
-void bmw_small_f2(uint32_t* h, const uint32_t* q, const void* m){
- uint32_t xl=0, xh;
- uint8_t i;
- for(i=16;i<24;++i){
- xl ^= q[i];
- }
- xh = xl;
- for(i=24;i<32;++i){
- xh ^= q[i];
- }
-#if DEBUG
- cli_putstr_P(PSTR("\r\n XL = "));
- cli_hexdump_rev(&xl, 4);
- cli_putstr_P(PSTR("\r\n XH = "));
- cli_hexdump_rev(&xh, 4);
-#endif
- memcpy(h, m, 16*4);
- h[0] ^= SHL32(xh, 5) ^ SHR32(q[16], 5);
- h[5] ^= SHL32(xh, 6) ^ SHR32(q[21], 6);
- h[3] ^= SHR32(xh, 1) ^ SHL32(q[19], 5);
- h[4] ^= SHR32(xh, 3) ^ q[20];
- h[6] ^= SHR32(xh, 4) ^ SHL32(q[22], 6);
- h[2] ^= SHR32(xh, 5) ^ SHL32(q[18], 5);
- h[1] ^= SHR32(xh, 7) ^ SHL32(q[17], 8);
- h[7] ^= SHR32(xh,11) ^ SHL32(q[23], 2);
- for(i=0; i<8; ++i){
- h[i] += xl ^ q[24+i] ^ q[i];
- }
- for(i=0; i<8; ++i){
- h[8+i] ^= xh ^ q[24+i];
- h[8+i] += ROTL32(h[(4+i)%8],i+9);
- }
- h[11] += SHL32(xl, 4) ^ q[18] ^ q[11];
- h[10] += SHL32(xl, 6) ^ q[17] ^ q[10];
- h[ 8] += SHL32(xl, 8) ^ q[23] ^ q[ 8];
- h[15] += SHR32(xl, 2) ^ q[22] ^ q[15];
- h[12] += SHR32(xl, 3) ^ q[19] ^ q[12];
- h[13] += SHR32(xl, 4) ^ q[20] ^ q[13];
- h[ 9] += SHR32(xl, 6) ^ q[16] ^ q[ 9];
- h[14] += SHR32(xl, 7) ^ q[21] ^ q[14];
-}
-*/
void bmw_small_nextBlock(bmw_small_ctx_t* ctx, const void* block){
uint32_t q[32];
dump_x(block, 16, 'M');
ctx->counter += 1;
ctx_dump(ctx);
}
+*/
void bmw_small_lastBlock(bmw_small_ctx_t* ctx, const void* block, uint16_t length_b){
- uint8_t buffer[64];
+ struct {
+ uint8_t buffer[64];
+ uint32_t ctr;
+ } pctx;
while(length_b >= BMW_SMALL_BLOCKSIZE){
bmw_small_nextBlock(ctx, block);
length_b -= BMW_SMALL_BLOCKSIZE;
block = (uint8_t*)block + BMW_SMALL_BLOCKSIZE_B;
}
- memset(buffer, 0, 64);
- memcpy(buffer, block, (length_b+7)/8);
- buffer[length_b>>3] |= 0x80 >> (length_b&0x07);
+ memset(pctx.buffer, 0, 64);
+ memcpy(pctx.buffer, block, (length_b+7)/8);
+ pctx.buffer[length_b>>3] |= 0x80 >> (length_b&0x07);
if(length_b+1>64*8-64){
- bmw_small_nextBlock(ctx, buffer);
- memset(buffer, 0, 64-8);
+ bmw_small_nextBlock(ctx, pctx.buffer);
+ memset(pctx.buffer, 0, 64-8);
ctx->counter -= 1;
}
- *((uint64_t*)&(buffer[64-8])) = (uint64_t)(ctx->counter*512LL)+(uint64_t)length_b;
- bmw_small_nextBlock(ctx, buffer);
+ *((uint64_t*)&(pctx.buffer[64-8])) = (uint64_t)(ctx->counter*512LL)+(uint64_t)length_b;
+ bmw_small_nextBlock(ctx, pctx.buffer);
uint8_t i;
- uint32_t q[32];
- memset(buffer, 0xaa, 64);
+ memset(pctx.buffer, 0xaa, 64);
for(i=0; i<16;++i){
- buffer[i*4] = i+0xa0;
+ pctx.buffer[i*4] = i+0xa0;
}
-// dump_x(buffer, 16, 'A');
- dump_x(ctx->h, 16, 'M');
- bmw_small_f0((uint32_t*)buffer, ctx->h, q);
- dump_x(buffer, 16, 'a');
- dump_x(q, 16, 'Q');
- bmw_small_f1(q, ctx->h, (uint32_t*)buffer);
- dump_x(q, 32, 'Q');
- bmw_small_f2((uint32_t*)buffer, q, ctx->h);
- memcpy(ctx->h, buffer, 64);
+ bmw_small_nextBlock((bmw_small_ctx_t*)&pctx, ctx->h);
+ memcpy(ctx->h, pctx.buffer, 64);
}
void bmw224_init(bmw224_ctx_t* ctx){