From 29a44972ae3749a6a273d936f2e15327ecae8a94 Mon Sep 17 00:00:00 2001 From: bg Date: Tue, 1 Jan 2013 17:01:57 +0100 Subject: [PATCH] [keccak-asm] keccak_nextBlock in asm --- keccak/keccak-asm.S | 140 +++++++++++++++++++++++++++++-------------- keccak/keccak-stub.c | 29 +-------- 2 files changed, 98 insertions(+), 71 deletions(-) diff --git a/keccak/keccak-asm.S b/keccak/keccak-asm.S index 4a13e09..3b3a488 100644 --- a/keccak/keccak-asm.S +++ b/keccak/keccak-asm.S @@ -31,7 +31,27 @@ .equ __zero_reg__, 1 -.global rho_pi_idx_table +/* +typedef struct{ + uint64_t a[5][5]; + uint16_t r, c; + uint8_t d, bs; +} keccak_ctx_t; +*/ + .struct 0 +ctx_a: + .struct ctx_a + 8 * 5 * 5 +ctx_r: + .struct ctx_r + 2 +ctx_c: + .struct ctx_c + 2 +ctx_d: + .struct ctx_d + 1 +ctx_bs: + + .section .text + + .global rho_pi_idx_table rho_pi_idx_table: .irp i, 0, 1, 2, 3, 4 .irp j, 0, 1, 2, 3, 4 @@ -39,9 +59,36 @@ rho_pi_idx_table: .endr .endr -.align 2 +/* +#define ROT_BIT(a) (( (a) <= 4) ? ((a) << 1) : (0x01 | ((8 - (a)) << 1))) +#define ROT_CODE(a) ((((a) / 8 + ((((a) % 8) > 4) ? 1 : 0)) << 4) | ROT_BIT(((a) % 8))) + +const uint8_t keccak_rotate_codes[5][5] PROGMEM = { + { ROT_CODE( 0), ROT_CODE( 1), ROT_CODE(62), ROT_CODE(28), ROT_CODE(27) }, + { ROT_CODE(36), ROT_CODE(44), ROT_CODE( 6), ROT_CODE(55), ROT_CODE(20) }, + { ROT_CODE( 3), ROT_CODE(10), ROT_CODE(43), ROT_CODE(25), ROT_CODE(39) }, + { ROT_CODE(41), ROT_CODE(45), ROT_CODE(15), ROT_CODE(21), ROT_CODE( 8) }, + { ROT_CODE(18), ROT_CODE( 2), ROT_CODE(61), ROT_CODE(56), ROT_CODE(14) } +}; +*/ + +keccak_rotate_codes: +.byte 0x00, 0x02, 0x85, 0x38, 0x36 +.byte 0x48, 0x58, 0x15, 0x73, 0x28 +.byte 0x06, 0x14, 0x56, 0x32, 0x53 +.byte 0x52, 0x67, 0x23, 0x37, 0x10 +.byte 0x24, 0x04, 0x87, 0x70, 0x25 + +keccak_rc_comp: +.byte 0x01, 0x92, 0xda, 0x70 +.byte 0x9b, 0x21, 0xf1, 0x59 +.byte 0x8a, 0x88, 0x39, 0x2a +.byte 0xbb, 0xcb, 0xd9, 0x53 +.byte 0x52, 0xc0, 0x1a, 0x6a +.byte 0xf1, 0xd0, 0x21, 0x78 + + .align 2 -.global rotate64_1bit_left rotate64_1bit_left: bst r25, 7 rol r18 @@ -55,7 +102,6 @@ rotate64_1bit_left: bld r18, 0 ret -.global rotate64_1bit_right rotate64_1bit_right: bst r18, 0 ror r25 @@ -69,27 +115,6 @@ rotate64_1bit_right: bld r25, 7 ret -.global rotate64_nbit_autodir -rotate64_nbit_autodir: - lsr r16 - brcc rotate64_nbit_left -.global rotate64_nbit_right -rotate64_nbit_right: - ldi r30, pm_lo8(rotate64_1bit_right) - ldi r31, pm_hi8(rotate64_1bit_right) - rjmp icall_r16_times -.global rotate64_nbit_left -rotate64_nbit_left: - ldi r30, pm_lo8(rotate64_1bit_left) - ldi r31, pm_hi8(rotate64_1bit_left) -icall_r16_times: -1: dec r16 - brmi 2f - icall - rjmp 1b -2: - ret - rotate64_1byte_left: mov r0, r25 mov r25, r24 @@ -161,8 +186,6 @@ rotate64_7byte_left: mov r23, r24 mov r24, r25 mov r25, r0 - ret - byte_rot_jmp_table: ret @@ -174,21 +197,6 @@ byte_rot_jmp_table: rjmp rotate64_6byte_left rjmp rotate64_7byte_left -.global rotate64left_code -rotate64left_code: - ldi r30, pm_lo8(byte_rot_jmp_table) - ldi r31, pm_hi8(byte_rot_jmp_table) - mov r0, r16 - andi r16, 0x70 - swap r16 - add r30, r16 - adc r31, r1 - mov r16, r0 - andi r16, 0x0f - icall - clr r1 - rjmp rotate64_nbit_autodir - /* void keccak_theta (uint64_t *a, uint64_t *b){ @@ -316,7 +324,24 @@ chi_step: brne 10b ret -.global keccak_f1600 + .global keccak_nextBlock + .func keccak_nextBlock +keccak_nextBlock: + movw ZL, r24 + subi ZL, lo8(-ctx_bs) + sbci ZL, hi8(-ctx_bs) + ld r20, Z + movw XL, r24 + movw ZL, r22 +10: + ld r22, X + ld r23, Z+ + eor r22, r23 + st X+, r22 + dec r20 + brne 10b + + .global keccak_f1600 keccak_f1600: push_range 2, 9 push r16 @@ -459,7 +484,34 @@ keccak_f1600: movw ZL, r2 lpm r16, Z+ movw r2, ZL - rcall rotate64left_code +rotate64left_code: + ldi r30, pm_lo8(byte_rot_jmp_table) + ldi r31, pm_hi8(byte_rot_jmp_table) + mov r0, r16 + andi r16, 0x70 + swap r16 + add r30, r16 + adc r31, r1 + mov r16, r0 + andi r16, 0x0f + icall + clr r1 +rotate64_nbit_autodir: + lsr r16 + brcc rotate64_nbit_left +rotate64_nbit_right: + ldi r30, pm_lo8(rotate64_1bit_right) + ldi r31, pm_hi8(rotate64_1bit_right) + rjmp icall_r16_times +rotate64_nbit_left: + ldi r30, pm_lo8(rotate64_1bit_left) + ldi r31, pm_hi8(rotate64_1bit_left) +icall_r16_times: +1: dec r16 + brmi 2f + icall + rjmp 1b +2: movw ZL, r4 lpm r16, Z+ movw r4, ZL diff --git a/keccak/keccak-stub.c b/keccak/keccak-stub.c index 8447703..2ffbe4a 100644 --- a/keccak/keccak-stub.c +++ b/keccak/keccak-stub.c @@ -20,11 +20,8 @@ #include #include #include -#include #include "memxor.h" -#include "rotate64.h" #include "keccak.h" -#include "stdio.h" #ifdef DEBUG # undef DEBUG @@ -34,6 +31,7 @@ #if DEBUG #include "cli.h" +#include "stdio.h" void keccak_dump_state(uint64_t a[5][5]){ uint8_t i,j; @@ -62,31 +60,8 @@ void keccak_dump_ctx(keccak_ctx_t* ctx){ #endif - -const uint8_t keccak_rc_comp[] PROGMEM = { - 0x01, 0x92, 0xda, 0x70, - 0x9b, 0x21, 0xf1, 0x59, - 0x8a, 0x88, 0x39, 0x2a, - 0xbb, 0xcb, 0xd9, 0x53, - 0x52, 0xc0, 0x1a, 0x6a, - 0xf1, 0xd0, 0x21, 0x78, -}; - -const uint8_t keccak_rotate_codes[5][5] PROGMEM = { - { ROT_CODE( 0), ROT_CODE( 1), ROT_CODE(62), ROT_CODE(28), ROT_CODE(27) }, - { ROT_CODE(36), ROT_CODE(44), ROT_CODE( 6), ROT_CODE(55), ROT_CODE(20) }, - { ROT_CODE( 3), ROT_CODE(10), ROT_CODE(43), ROT_CODE(25), ROT_CODE(39) }, - { ROT_CODE(41), ROT_CODE(45), ROT_CODE(15), ROT_CODE(21), ROT_CODE( 8) }, - { ROT_CODE(18), ROT_CODE( 2), ROT_CODE(61), ROT_CODE(56), ROT_CODE(14) } -}; - void keccak_f1600(uint64_t a[5][5]); -void keccak_nextBlock(keccak_ctx_t* ctx, const void* block){ - memxor(ctx->a, block, ctx->bs); - keccak_f1600(ctx->a); -} - void keccak_lastBlock(keccak_ctx_t* ctx, const void* block, uint16_t length_b){ while(length_b >= ctx->r){ keccak_nextBlock(ctx, block); @@ -94,7 +69,7 @@ void keccak_lastBlock(keccak_ctx_t* ctx, const void* block, uint16_t length_b){ length_b -= ctx->r; } memxor(ctx->a, block, (length_b)/8); - /* appand 1 */ + /* append 1 */ if(length_b & 7){ /* we have some single bits */ uint8_t t; -- 2.39.2