X-Git-Url: https://git.cryptolib.org/?p=avr-crypto-lib.git;a=blobdiff_plain;f=keccak%2Fkeccak-asm.S;h=459994ae31d122f9ba135fe1135360f300f00f1b;hp=7b7c6cf3144bb4374831c4cb5dbb1ecb9f66d420;hb=eb0cafe05ab4cdf60878dbd81e4ff3712d5150f2;hpb=628319e6c3018268ef1c307976a0e81e4dc549b8 diff --git a/keccak/keccak-asm.S b/keccak/keccak-asm.S index 7b7c6cf..459994a 100644 --- a/keccak/keccak-asm.S +++ b/keccak/keccak-asm.S @@ -34,8 +34,8 @@ /* typedef struct{ uint64_t a[5][5]; - uint16_t r, c; - uint8_t d, bs; + uint16_t r; + uint8_t bs; } keccak_ctx_t; */ .struct 0 @@ -429,8 +429,6 @@ keccak_f1600: subi XL, lo8(4 * 5 * 8 + 8) sbci XH, hi8(4 * 5 * 8 + 8) rcall theta_2b - -; ret /* -- rho & pi -- for(i = 0; i < 5; ++i){ @@ -442,8 +440,8 @@ keccak_f1600: -- or -- - const uint8_t* rot_code = (const uint8_t*)keccak_rotate_codes; - const uint8_t* idx_idx = (const uint8_t*)rho_pi_idx_table; + const uint8_t *rot_code = (const uint8_t*)keccak_rotate_codes; + const uint8_t *idx_idx = (const uint8_t*)rho_pi_idx_table; uint64_t *a_tmp = (uint64_t*)a; for(i = 0; i < 25; ++i){ *((uint64_t*)(((uint8_t*)b) + pgm_read_byte(idx_idx++))) = @@ -622,8 +620,44 @@ icall_r16_times: ret .endfunc + + .global keccak224_ctx2hash + .func keccak224_ctx2hash +keccak224_ctx2hash: + movw r20, r22 + ldi r22, lo8(224) + ldi r23, hi8(224) + rjmp keccak_ctx2hash + .endfunc + + .global keccak384_ctx2hash + .func keccak384_ctx2hash +keccak384_ctx2hash: + movw r20, r22 + ldi r22, lo8(384) + ldi r23, hi8(384) + rjmp keccak_ctx2hash + .endfunc + + .global keccak512_ctx2hash + .func keccak512_ctx2hash +keccak512_ctx2hash: + movw r20, r22 + ldi r22, lo8(512) + ldi r23, hi8(512) + rjmp keccak_ctx2hash + .endfunc + + .global keccak256_ctx2hash + .func keccak256_ctx2hash +keccak256_ctx2hash: + movw r20, r22 + ldi r22, lo8(256) + ldi r23, hi8(256) + .endfunc + /* -void keccak_ctx2hash(void* dest, uint16_t length_b, keccak_ctx_t* ctx){ +void keccak_ctx2hash(void *dest, uint16_t length_b, keccak_ctx_t *ctx){ while(length_b>=ctx->r){ memcpy(dest, ctx->a, ctx->bs); dest = (uint8_t*)dest + ctx->bs; @@ -633,13 +667,13 @@ void keccak_ctx2hash(void* dest, uint16_t length_b, keccak_ctx_t* ctx){ memcpy(dest, ctx->a, (length_b+7)/8); } */ -; .global keccak_ctx2hash -; .func keccak_ctx2hash -;keccak_ctx2hash: + .global keccak_ctx2hash + .func keccak_ctx2hash +keccak_ctx2hash: push_range 2, 10 movw r4, r20 movw r6, r24 - movw ZL, r24 + movw ZL, r20 movw r8, r22 subi ZL, lo8(-ctx_r) sbci ZH, hi8(-ctx_r) @@ -650,7 +684,6 @@ void keccak_ctx2hash(void* dest, uint16_t length_b, keccak_ctx_t* ctx){ ; length_b = (r9:r8) ; r = (r3:r2) ; (H:L) cp r2, r8 cpc r3, r9 - rjmp 40f brsh 40f movw XL, r4 movw ZL, r6 @@ -687,5 +720,191 @@ void keccak_ctx2hash(void* dest, uint16_t length_b, keccak_ctx_t* ctx){ 99: pop_range 2, 10 ret -; .endfunc + .endfunc + + + .global keccak224_init + .func keccak224_init +keccak224_init: + movw XL, r24 + ldi r24, lo8(1152) + ldi r25, hi8(1152) + rjmp keccak_init_1 + .endfunc + + .global keccak384_init + .func keccak384_init +keccak384_init: + movw XL, r24 + ldi r24, lo8( 832) + ldi r25, hi8( 832) + rjmp keccak_init_1 + .endfunc + + .global keccak512_init + .func keccak512_init +keccak512_init: + movw XL, r24 + ldi r24, lo8( 576) + ldi r25, hi8( 576) + rjmp keccak_init_1 + .endfunc + + .global keccak256_init + .func keccak256_init +keccak256_init: + movw r22, r24 + ldi r24, lo8(1088) + ldi r25, hi8(1088) + .endfunc +/* +void keccak_init(uint16_t r, keccak_ctx_t *ctx){ + memset(ctx->a, 0x00, 5 * 5 * 8); + ctx->r = r; + ctx->bs = (uint8_t)(r / 8); +} +*/ + .global keccak_init + .func keccak_init +keccak_init: + movw XL, r22 +keccak_init_1: + ldi r22, 200 +10: + st X+, __zero_reg__ + dec r22 + brne 10b + st X+, r24 + st X+, r25 + lsr r25 + ror r24 + lsr r25 + ror r24 + lsr r25 + ror r24 + st X+, r24 + ret + .endfunc + +/* +void keccak_lastBlock(keccak_ctx_t *ctx, const void *block, uint16_t length_b){ + uint8_t length_B; + uint8_t t; + while(length_b >= ctx->r){ + keccak_nextBlock(ctx, block); + block = (uint8_t*)block + ctx->bs; + length_b -= ctx->r; + } + length_B = length_b / 8; + memxor(ctx->a, block, length_B); + / * append 1 * / + if(length_b & 7){ + / * we have some single bits * / + t = ((uint8_t*)block)[length_B] >> (8 - (length_b & 7)); + t |= 0x01 << (length_b & 7); + }else{ + t = 0x01; + } + ctx->a[length_B] ^= t; + if(length_b == ctx->r - 1){ + keccak_f1600(ctx->a); + } + +*/ +.set length_b_l, 2 +.set length_b_h, 3 +.set pbs, 10 +.set pr_l, 8 +.set pr_h, 9 +.set ctx_l, 6 +.set ctx_h, 7 + + .global keccak_lastBlock + .func keccak_lastBlock +keccak_lastBlock: + push_range 2, 10 + movw r2, r20 + movw r4, r22 + movw r6, r24 + movw XL, r24 + subi XL, lo8(-ctx_r) + sbci XH, hi8(-ctx_r) + ld pr_l, X+ + ld pr_h, X+ + ld pbs, X +10: + cp length_b_l, pr_l + cpc length_b_h, pr_h + brlo 20f + movw r24, ctx_l + movw r22, r4 + rcall keccak_nextBlock + add r4, pbs + adc r5, __zero_reg__ + sub length_b_l, pr_l + sbc length_b_h, pr_h + rjmp 10b +20: + movw ZL, ctx_l + movw XL, r4 + movw r22, length_b_l + lsr r23 + ror r22 + lsr r23 + ror r22 + lsr r23 + ror r22 + mov r23, r22 + breq 20f +10: + ld r25, X+ + ld r24, Z + eor r24, r25 + st Z+, r24 + dec r23 + brne 10b +20: + ldi r25, 1 + mov r18, length_b_l + andi r18, 7 + breq 30f + /* we have trailing bits */ + mov r19, r18 + ld r24, X+ + subi r18, 8 + neg r18 +10: + lsr r24 + dec r18 + brne 10b +10: + lsl r25 + dec r19 + brne 10b + or r25, r24 +30: + ld r24, Z + eor r24, r25 + st Z, r24 + + movw r24, pr_l + sbiw r24, 1 + cp length_b_l, r24 + cpc length_b_h, r25 + brne 20f + movw r24, ctx_l + rcall keccak_f1600 +20: + movw XL, ctx_l + dec pbs + add XL, pbs + adc XH, __zero_reg__ + ld r24, X + ldi r25, 0x80 + eor r24, r25 + st X, r24 + movw r24, ctx_l + pop_range 2, 10 + rjmp keccak_f1600 + .endfunc