# Makefile for the AVR-Crypto-Lib project
-# author: Daniel Otte
+#
+# This file is part of the AVR-Crypto-Lib.
+# Copyright (C) 2010 Daniel Otte (daniel.otte@rub.de)
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
SHELL = sh
BLOCK_CIPHERS :=
.PHONY: depclean
depclean: clean
- rm $(DEP_DIR)*.d
+ rm -f $(DEP_DIR)*.d
#-------------------------------------------------------------------------------
# dependency inclusion
acc0 = 14
acc1 = 15
+#define DEBUG 0
+
/******************************************************************************/
/*
param a: r22:r23:r24:r25
param src: r30:r31 (Z)
param len: r20
*/
-memxor_short:
+memxor_64:
; tst r20
; breq memxor_exit
ldi r20, 64
+memxor:
10: ld r21, X
ld r22, Z+
eor r21, r22
movw acc2, r24
ret
-eor_acc_from_Y_add_to_Z:
- rcall load32_from_Y
- rcall eor32_to_acc
- rjmp add_acc_to_Z
-
/******************************************************************************/
/*
param q: r28:r29 (Y)
ld acc3, X+
ret
-add_acc_to_Z:
- ld r0, Z
+add_acc_to_X:
+ ld r0, X
add r0, acc0
- st Z+, r0
- ld r0, Z
+ st X+, r0
+ ld r0, X
adc r0, acc1
- st Z+, r0
- ld r0, Z
+ st X+, r0
+ ld r0, X
adc r0, acc2
- st Z+, r0
- ld r0, Z
+ st X+, r0
+ ld r0, X
adc r0, acc3
- st Z+, r0
+ st X+, r0
ret
load_rotate_add_M:
*/
f2_1_shift_table:
- .byte 0x2B, 0x64, 0x66, 0x03, 0x51, 0x55, 0x87, 0x55
+; .byte 0x2B, 0x64, 0x66, 0x03, 0x51, 0x55, 0x87, 0x55
+ .byte 0x55, 0x87, 0x55, 0x51, 0x03, 0x66, 0x64, 0x2B
f2_2_shift_table:
- .byte (2<<1), (7<<1), (4<<1), (3<<1), (4<<1)+1, (6<<1)+1, (6<<1)
-
+; .byte (2<<1), (7<<1), (4<<1), (3<<1), (4<<1)+1, (6<<1)+1, (6<<1)
+ .byte (8<<1)+1, (6<<1), (6<<1)+1, (4<<1)+1, (3<<1), (4<<1), (7<<1), (2<<1)
expand2_rot_table:
.byte 3,7,13,16,19,23,27
-; .byte 0 ; just for alignment
+ .byte 0 ; just for alignment
expand2:
rcall expand_intro
movw m0, r30
/* xor m into h */
; ldi r20, 64
- rcall memxor_short
+ rcall memxor_64
movw r30, m0
movw r26, h0
; ldi r20, 64
movw r26, h0
movw r30, m0
- rcall memxor_short
+ rcall memxor_64
sbiw r26, 60
;---
clr r17
h1 = 19
f2:
movw r26, r24
- /* calc XL */
+ /* calc XL & XH */
adiw r26, 63
adiw r26, 1
movw q16_0, r26
movw h0, r20
+;---
+; push h0
+; push h1
+;---
movw r28, r22
- rcall load32_from_X
- rcall mov32_to_acc
+ rcall load_acc_from_X
ldi r17, 15
10: rcall load32_from_X
rcall eor32_to_acc
; rcall print32
; pop_range 22, 25
;--- END DBG
-
+ /* copy m(Y) into h */
+ movw r26, h0
+ ldi r22, 64
+10:
+ ld r23, Y+
+ st X+, r23
+ dec r22
+ brne 10b
;--- /* calc first half of h0..h15 */
- movw r26, q16_0
+ movw r28, q16_0
+ movw r26, h0
+ ldi r30, lo8(f2_1_shift_table)
+ ldi r31, hi8(f2_1_shift_table)
ldi r17, 16
10:
- rcall load32_from_Y
- rcall mov32_to_acc
;---
movw r22, xh0
movw r24, xh2
brge 15f
clr r1
rjmp 26f
-15: ldi r30, lo8(f2_1_shift_table-9)
- ldi r31, hi8(f2_1_shift_table-9)
- add r30, r17
- adc r31, r1
- lpm r20, Z
+15: lpm r20, Z+
mov r1, r20
andi r20, 0x0f
clt
rcall shiftright32
rjmp 26f
25: rcall shiftleft32
-26: rcall eor32_to_acc
+26: rcall mov32_to_acc
;---
- rcall load32_from_X
+ rcall load32_from_Y
mov r20, r1
clr r1
swap r20
27: rcall shiftright32
28: rcall eor32_to_acc
;---
- movw r30, h0
- st Z+, acc0
- st Z+, acc1
- st Z+, acc2
- st Z+, acc3
- movw h0, r30
+ ld r0, X
+ eor r0, acc0
+ st X+, r0
+ ld r0, X
+ eor r0, acc1
+ st X+, r0
+ ld r0, X
+ eor r0, acc2
+ st X+, r0
+ ld r0, X
+ eor r0, acc3
+ st X+, r0
;---
dec r17
brne 10b
;-----
- sbiw r26, 4*8 /* X points to q[24] */
- movw r28, r26
+ sbiw r28, 4*8 /* Y points to q[24] */
+ movw r30, r28
sbiw r28, 63
sbiw r28, 33 /* Y points to q[0] */
- sbiw r30, 63
- sbiw r30, 1 /* Z points to h0 */
- ldi r17, 8
-10: movw acc0, xl0
- movw acc2, xl2
- rcall load32_from_X
- rcall eor32_to_acc
- rcall eor_acc_from_Y_add_to_Z
- dec r17
- brne 10b
- sbiw r26, 9*4 /* X points to q[23] */
- rcall load_acc_from_X
- eor acc1, xl0
- eor acc2, xl1
- eor acc3, xl2
- rcall eor_acc_from_Y_add_to_Z
-;---
- sbiw r26, 8*4 /* X points to q[16] */
- mov h0, r30
- ldi r17, 7
-10:
- ldi r30, lo8(f2_2_shift_table-1)
- ldi r31, hi8(f2_2_shift_table-1)
- add r30, r17
- adc r31, r1
- lpm r20, Z
- rcall load_acc_from_X
- movw r22, xl0
+ movw r26, r28
+ ldi r20, 8*4
+ /* xor q[24..31] into q[0..7] */
+ rcall memxor
+ /* xor q[23] into q[8] */
+ sbiw r30, 9*4
+ ldi r20, 4
+ rcall memxor
+ /* xor q[16..22] into q[9..15] */
+ sbiw r30, 8*4
+ ldi r20, 7*4
+ rcall memxor
+
+ movw r26, h0
+ ldi r17, 15
+ ldi r30, lo8(f2_2_shift_table)
+ ldi r31, hi8(f2_2_shift_table)
+10: movw r22, xl0
movw r24, xl2
+ sbrc r17, 3
+ rjmp 20f
+ lpm r20, Z+
lsr r20
- brcc 20f
+ brcs 15f
+ rcall shiftright32
+ rjmp 20f
+15:
rcall shiftleft32
- rjmp 21f
-20: rcall shiftright32
-21:
- movw r30, h0
+20:
+ rcall mov32_to_acc
+ rcall load32_from_Y
rcall eor32_to_acc
- rcall eor_acc_from_Y_add_to_Z
- movw h0, r30
+ rcall add_acc_to_X
dec r17
- brne 10b
+ brpl 10b
;-----
- sbiw r30, 8*4 /* Z points to h8 */
- movw r26, r30
- sbiw r26, 4*4 /* X points to h4 */
+ sbiw r26, 8*4 /* X points to h8 */
+ movw r28, r26
+ sbiw r28, 4*4 /* Y points to h4 */
ldi r17, 8
ldi r18, 9
10:
- rcall load32_from_X
+ rcall load32_from_Y
mov r20, r18
rcall rotateleft32
rcall mov32_to_acc
- rcall add_acc_to_Z
+ rcall add_acc_to_X
inc r18
cpi r17, 5
brne 20f
- sbiw r26, 8*4
+ sbiw r28, 8*4
20: dec r17
brne 10b
+exit:
;--- DBG
; pop r25
; pop r24