X-Git-Url: https://git.cryptolib.org/?p=avr-crypto-lib.git;a=blobdiff_plain;f=keccak%2Fkeccak-asm.S;h=459994ae31d122f9ba135fe1135360f300f00f1b;hp=3b3a48818fdc84a47547b64d61e6b00ad1eca3a9;hb=eb0cafe05ab4cdf60878dbd81e4ff3712d5150f2;hpb=29a44972ae3749a6a273d936f2e15327ecae8a94 diff --git a/keccak/keccak-asm.S b/keccak/keccak-asm.S index 3b3a488..459994a 100644 --- a/keccak/keccak-asm.S +++ b/keccak/keccak-asm.S @@ -34,8 +34,8 @@ /* typedef struct{ uint64_t a[5][5]; - uint16_t r, c; - uint8_t d, bs; + uint16_t r; + uint8_t bs; } keccak_ctx_t; */ .struct 0 @@ -43,10 +43,6 @@ ctx_a: .struct ctx_a + 8 * 5 * 5 ctx_r: .struct ctx_r + 2 -ctx_c: - .struct ctx_c + 2 -ctx_d: - .struct ctx_d + 1 ctx_bs: .section .text @@ -329,7 +325,7 @@ chi_step: keccak_nextBlock: movw ZL, r24 subi ZL, lo8(-ctx_bs) - sbci ZL, hi8(-ctx_bs) + sbci ZH, hi8(-ctx_bs) ld r20, Z movw XL, r24 movw ZL, r22 @@ -340,8 +336,10 @@ keccak_nextBlock: st X+, r22 dec r20 brne 10b + .endfunc .global keccak_f1600 + .func keccak_f1600 keccak_f1600: push_range 2, 9 push r16 @@ -431,8 +429,6 @@ keccak_f1600: subi XL, lo8(4 * 5 * 8 + 8) sbci XH, hi8(4 * 5 * 8 + 8) rcall theta_2b - -; ret /* -- rho & pi -- for(i = 0; i < 5; ++i){ @@ -444,8 +440,8 @@ keccak_f1600: -- or -- - const uint8_t* rot_code = (const uint8_t*)keccak_rotate_codes; - const uint8_t* idx_idx = (const uint8_t*)rho_pi_idx_table; + const uint8_t *rot_code = (const uint8_t*)keccak_rotate_codes; + const uint8_t *idx_idx = (const uint8_t*)rho_pi_idx_table; uint64_t *a_tmp = (uint64_t*)a; for(i = 0; i < 25; ++i){ *((uint64_t*)(((uint8_t*)b) + pgm_read_byte(idx_idx++))) = @@ -542,15 +538,17 @@ icall_r16_times: */ ; memcpy(a, b, 200) ; X points at b + 32 + 8 = b + 40 = b[1][0] has to point to b[0][0] - ldi r16, 200 + ldi r16, 200 / 8 sbiw XL, 5 * 8 movw ZL, XL subi YL, lo8(5 * 5 * 8) sbci YH, hi8(5 * 5 * 8) movw r2, YL 10: + .rept 8 ld r22, X+ st Y+, r22 + .endr dec r16 brne 10b @@ -619,5 +617,294 @@ icall_r16_times: pop_range 28, 29 pop r16 pop_range 2, 9 + ret + .endfunc + + + .global keccak224_ctx2hash + .func keccak224_ctx2hash +keccak224_ctx2hash: + movw r20, r22 + ldi r22, lo8(224) + ldi r23, hi8(224) + rjmp keccak_ctx2hash + .endfunc + + .global keccak384_ctx2hash + .func keccak384_ctx2hash +keccak384_ctx2hash: + movw r20, r22 + ldi r22, lo8(384) + ldi r23, hi8(384) + rjmp keccak_ctx2hash + .endfunc + + .global keccak512_ctx2hash + .func keccak512_ctx2hash +keccak512_ctx2hash: + movw r20, r22 + ldi r22, lo8(512) + ldi r23, hi8(512) + rjmp keccak_ctx2hash + .endfunc + + .global keccak256_ctx2hash + .func keccak256_ctx2hash +keccak256_ctx2hash: + movw r20, r22 + ldi r22, lo8(256) + ldi r23, hi8(256) + .endfunc +/* +void keccak_ctx2hash(void *dest, uint16_t length_b, keccak_ctx_t *ctx){ + while(length_b>=ctx->r){ + memcpy(dest, ctx->a, ctx->bs); + dest = (uint8_t*)dest + ctx->bs; + length_b -= ctx->r; + keccak_f1600(ctx->a); + } + memcpy(dest, ctx->a, (length_b+7)/8); +} +*/ + .global keccak_ctx2hash + .func keccak_ctx2hash +keccak_ctx2hash: + push_range 2, 10 + movw r4, r20 + movw r6, r24 + movw ZL, r20 + movw r8, r22 + subi ZL, lo8(-ctx_r) + sbci ZH, hi8(-ctx_r) + ld r2, Z+ + ld r3, Z+ + ldd r10, Z+3 ; load blocksize (in bytes) +10: + ; length_b = (r9:r8) ; r = (r3:r2) ; (H:L) + cp r2, r8 + cpc r3, r9 + brsh 40f + movw XL, r4 + movw ZL, r6 + mov r24, r10 +20: + ld r22, X+ + st Z+, r22 + dec r24 + brne 20b + movw r6, ZL + sub r8, r2 + sbc r9, r3 + movw r24, r4 + rcall keccak_f1600 + rjmp 10b +40: + movw XL, r4 + movw ZL, r6 + movw r24, r8 + adiw r24, 7 + lsr r25 + ror r24 + lsr r25 + ror r24 + lsr r25 + ror r24 + adiw r24, 0 + breq 99f +10: + ld r22, X+ + st Z+, r22 + sbiw r24, 1 + brne 10b +99: + pop_range 2, 10 ret + .endfunc + + + .global keccak224_init + .func keccak224_init +keccak224_init: + movw XL, r24 + ldi r24, lo8(1152) + ldi r25, hi8(1152) + rjmp keccak_init_1 + .endfunc + + .global keccak384_init + .func keccak384_init +keccak384_init: + movw XL, r24 + ldi r24, lo8( 832) + ldi r25, hi8( 832) + rjmp keccak_init_1 + .endfunc + + .global keccak512_init + .func keccak512_init +keccak512_init: + movw XL, r24 + ldi r24, lo8( 576) + ldi r25, hi8( 576) + rjmp keccak_init_1 + .endfunc + + .global keccak256_init + .func keccak256_init +keccak256_init: + movw r22, r24 + ldi r24, lo8(1088) + ldi r25, hi8(1088) + .endfunc +/* +void keccak_init(uint16_t r, keccak_ctx_t *ctx){ + memset(ctx->a, 0x00, 5 * 5 * 8); + ctx->r = r; + ctx->bs = (uint8_t)(r / 8); +} +*/ + .global keccak_init + .func keccak_init +keccak_init: + movw XL, r22 +keccak_init_1: + ldi r22, 200 +10: + st X+, __zero_reg__ + dec r22 + brne 10b + st X+, r24 + st X+, r25 + lsr r25 + ror r24 + lsr r25 + ror r24 + lsr r25 + ror r24 + st X+, r24 + ret + .endfunc + +/* +void keccak_lastBlock(keccak_ctx_t *ctx, const void *block, uint16_t length_b){ + uint8_t length_B; + uint8_t t; + while(length_b >= ctx->r){ + keccak_nextBlock(ctx, block); + block = (uint8_t*)block + ctx->bs; + length_b -= ctx->r; + } + length_B = length_b / 8; + memxor(ctx->a, block, length_B); + / * append 1 * / + if(length_b & 7){ + / * we have some single bits * / + t = ((uint8_t*)block)[length_B] >> (8 - (length_b & 7)); + t |= 0x01 << (length_b & 7); + }else{ + t = 0x01; + } + ctx->a[length_B] ^= t; + if(length_b == ctx->r - 1){ + keccak_f1600(ctx->a); + } + +*/ +.set length_b_l, 2 +.set length_b_h, 3 +.set pbs, 10 +.set pr_l, 8 +.set pr_h, 9 +.set ctx_l, 6 +.set ctx_h, 7 + + .global keccak_lastBlock + .func keccak_lastBlock +keccak_lastBlock: + push_range 2, 10 + movw r2, r20 + movw r4, r22 + movw r6, r24 + movw XL, r24 + subi XL, lo8(-ctx_r) + sbci XH, hi8(-ctx_r) + ld pr_l, X+ + ld pr_h, X+ + ld pbs, X +10: + cp length_b_l, pr_l + cpc length_b_h, pr_h + brlo 20f + movw r24, ctx_l + movw r22, r4 + rcall keccak_nextBlock + add r4, pbs + adc r5, __zero_reg__ + sub length_b_l, pr_l + sbc length_b_h, pr_h + rjmp 10b +20: + movw ZL, ctx_l + movw XL, r4 + movw r22, length_b_l + lsr r23 + ror r22 + lsr r23 + ror r22 + lsr r23 + ror r22 + mov r23, r22 + breq 20f +10: + ld r25, X+ + ld r24, Z + eor r24, r25 + st Z+, r24 + dec r23 + brne 10b +20: + ldi r25, 1 + mov r18, length_b_l + andi r18, 7 + breq 30f + /* we have trailing bits */ + mov r19, r18 + ld r24, X+ + subi r18, 8 + neg r18 +10: + lsr r24 + dec r18 + brne 10b +10: + lsl r25 + dec r19 + brne 10b + or r25, r24 +30: + ld r24, Z + eor r24, r25 + st Z, r24 + + movw r24, pr_l + sbiw r24, 1 + cp length_b_l, r24 + cpc length_b_h, r25 + brne 20f + movw r24, ctx_l + rcall keccak_f1600 +20: + movw XL, ctx_l + dec pbs + add XL, pbs + adc XH, __zero_reg__ + ld r24, X + ldi r25, 0x80 + eor r24, r25 + st X, r24 + movw r24, ctx_l + pop_range 2, 10 + rjmp keccak_f1600 + .endfunc +