]> git.cryptolib.org Git - avr-crypto-lib.git/blobdiff - keccak/keccak-asm.S
[keccak-asm] removing rotate64 from asm build
[avr-crypto-lib.git] / keccak / keccak-asm.S
index 77dc9ce10bc99f92f67f5b7e43ddcf0155c5532c..9d613f95042bdec0006cb4f1fb2d6ae8183fe195 100644 (file)
@@ -40,6 +40,156 @@ rho_pi_idx_table:
        .endr
 
 .align 2
+
+.global rotate64_1bit_left
+rotate64_1bit_left:
+       bst r25, 7
+       rol r18
+       rol r19
+       rol r20
+       rol r21
+       rol r22
+       rol r23
+       rol r24
+       rol r25
+       bld r18, 0
+       ret
+
+.global rotate64_1bit_right
+rotate64_1bit_right:
+       bst r18, 0
+       ror r25
+       ror r24
+       ror r23
+       ror r22
+       ror r21
+       ror r20
+       ror r19
+       ror r18
+       bld r25, 7
+       ret
+
+.global rotate64_nbit_autodir
+rotate64_nbit_autodir:
+       lsr r16
+       brcc rotate64_nbit_left
+.global rotate64_nbit_right
+rotate64_nbit_right:
+       ldi r30, pm_lo8(rotate64_1bit_right)
+       ldi r31, pm_hi8(rotate64_1bit_right)
+       rjmp icall_r16_times
+.global rotate64_nbit_left
+rotate64_nbit_left:
+       ldi r30, pm_lo8(rotate64_1bit_left)
+       ldi r31, pm_hi8(rotate64_1bit_left)
+icall_r16_times:
+1:     dec r16
+       brmi 2f
+       icall
+       rjmp 1b
+2:
+    ret
+
+rotate64_1byte_left:
+       mov r0, r25
+       mov r25, r24
+       mov r24, r23
+       mov r23, r22
+       mov r22, r21
+       mov r21, r20
+       mov r20, r19
+       mov r19, r18
+       mov r18, r0
+       ret
+
+rotate64_2byte_left:
+       movw r0, r24
+       movw r24, r22
+       movw r22, r20
+       movw r20, r18
+       movw r18, r0
+       ret
+
+rotate64_3byte_left:
+       mov r0, r25
+       mov r25, r22
+       mov r22, r19
+       mov r19, r24
+       mov r24, r21
+       mov r21, r18
+       mov r18, r23
+       mov r23, r20
+       mov r20, r0
+       ret
+
+rotate64_4byte_left:
+       movw r0, r24
+       movw r24, r20
+       movw r20, r0
+       movw r0, r22
+       movw r22, r18
+       movw r18, r0
+       ret
+
+rotate64_5byte_left:
+       mov r0, r25
+       mov r25, r20
+       mov r20, r23
+       mov r23, r18
+       mov r18, r21
+       mov r21, r24
+       mov r24, r19
+       mov r19, r22
+       mov r22, r0
+       ret
+
+rotate64_6byte_left:
+       movw r0, r18
+       movw r18, r20
+       movw r20, r22
+       movw r22, r24
+       movw r24, r0
+       ret
+
+rotate64_7byte_left:
+       mov r0, r18
+       mov r18, r19
+       mov r19, r20
+       mov r20, r21
+       mov r21, r22
+       mov r22, r23
+       mov r23, r24
+       mov r24, r25
+       mov r25, r0
+       ret
+
+
+byte_rot_jmp_table:
+       ret
+       rjmp rotate64_1byte_left
+       rjmp rotate64_2byte_left
+       rjmp rotate64_3byte_left
+       rjmp rotate64_4byte_left
+       rjmp rotate64_5byte_left
+       rjmp rotate64_6byte_left
+       rjmp rotate64_7byte_left
+
+.global rotate64left_code
+rotate64left_code:
+       ldi r30, pm_lo8(byte_rot_jmp_table)
+       ldi r31, pm_hi8(byte_rot_jmp_table)
+       mov r0, r16
+       andi r16, 0x70
+       swap r16
+       add r30, r16
+       adc r31, r1
+       mov r16, r0
+       andi r16, 0x0f
+       icall
+       clr r1
+       rjmp rotate64_nbit_autodir
+
+
 /*
        void keccak_theta (uint64_t *a, uint64_t *b){
        // uint64_t b[5][5];
@@ -166,14 +316,18 @@ chi_step:
        brne 10b
        ret
 
-.global keccak_theta
-keccak_theta:
-       push_range 2, 8
+.global keccak_f1600
+keccak_f1600:
+       push_range 2, 9
        push r16
        push_range 28, 29
 
+       stack_alloc_large 200, r26, r27
+       adiw XL, 1
+
+       clr r9
+5:
        movw r30, r24 ; Z = a
-       movw r26, r22 ; X = b
 
        ldi r19, 5
 10:
@@ -305,7 +459,7 @@ keccak_theta:
        movw ZL, r2
        lpm r16, Z+
        movw r2, ZL
-       call rotate64left_code
+       rcall rotate64left_code
        movw ZL, r4
        lpm r16, Z+
        movw r4, ZL
@@ -350,6 +504,7 @@ keccak_theta:
 
        ; Z points at b
        movw XL, ZL
+       movw r4, ZL
        adiw XL, 8
        adiw ZL, 16
        movw YL, r2
@@ -366,8 +521,51 @@ keccak_theta:
        adiw ZL, 5 * 8
        dec r18
        brne 10b
+
+       /* -- iota -- */
+       ldi r30, lo8(keccak_rc_comp)
+       ldi r31, hi8(keccak_rc_comp)
+       add r30, r9
+       adc r31, __zero_reg__
+       lpm r20, Z+
+       movw YL, r2
+       ldi r21, 0x80
+       bst r20, 6
+       brtc 10f
+       ldd r22, Y+7
+       eor r22, r21
+       std Y+7, r22
+10:
+       bst r20, 5
+       brtc 10f
+       ldd r22, Y+3
+       eor r22, r21
+       std Y+3, r22
+10:
+       bst r20, 4
+       brtc 10f
+       ldd r22, Y+1
+       eor r22, r21
+       std Y+1, r22
+10:
+       andi r20, 0x8f
+       ld r22, Y
+       eor r22, r20
+       st Y, r22
+
+       inc r9
+       mov r16, r9
+       cpi r16, 24
+       breq 20f
+       movw r24, YL
+       movw r26, r4
+       rjmp 5b
+20:
+
+       stack_free_large3 200
+
        pop_range 28, 29
        pop r16
-       pop_range 2, 8
+       pop_range 2, 9
 
        ret