X-Git-Url: https://git.cryptolib.org/?a=blobdiff_plain;f=aes_enc-asm.S;h=14514f69d83309b7f82a1068dffaa484d4123347;hb=17332291e15183d71d88ed868275e3cb53917180;hp=ed04b6f6ff97a03b248f262e53098db101f36c61;hpb=5e274071cfce142ba67387bea5ad017b42b8001c;p=avr-crypto-lib.git diff --git a/aes_enc-asm.S b/aes_enc-asm.S index ed04b6f..14514f6 100644 --- a/aes_enc-asm.S +++ b/aes_enc-asm.S @@ -1,6 +1,6 @@ /* aes_enc-asm.S */ /* - This file is part of the Crypto-avr-lib/microcrypt-lib. + This file is part of the This file is part of the AVR-Crypto-Lib. Copyright (C) 2008, 2009 Daniel Otte (daniel.otte@rub.de) This program is free software: you can redistribute it and/or modify @@ -38,25 +38,6 @@ B = 29 P = 0 xREDUCER = 25 -gf256mul: - clr P -1: - lsr A - breq 4f - brcc 2f - eor P, B -2: - lsl B - brcc 3f - eor B, xREDUCER -3: - rjmp 1b -4: - brcc 2f - eor P, B -2: - ret - .global aes256_enc aes256_enc: ldi r20, 14 @@ -118,6 +99,10 @@ aes_encrypt_core: .irp param,ST00, ST01, ST02, ST03, ST10, ST11, ST12, ST13, ST20, ST21, ST22, ST23, ST30, ST31, ST32, ST33 ld \param, Z+ .endr + + ldi xREDUCER, 0x1b /* load reducer */ + ldi r31, hi8(aes_sbox) + /* key whitening */ 1: .irp param,ST00, ST01, ST02, ST03, ST10, ST11, ST12, ST13, ST20, ST21, ST22, ST23, ST30, ST31, ST32, ST33 @@ -126,323 +111,227 @@ aes_encrypt_core: .endr brtc 2f - rjmp exit +exit: + pop r31 + pop r30 + st Z+, ST00 + st Z+, ST01 + st Z+, ST02 + st Z+, ST03 + st Z+, ST10 + st Z+, ST11 + st Z+, ST12 + st Z+, ST13 + st Z+, ST20 + st Z+, ST21 + st Z+, ST22 + st Z+, ST23 + st Z+, ST30 + st Z+, ST31 + st Z+, ST32 + st Z+, ST33 + pop r29 + pop r28 + pop_range 2, 17 + ret + 2: dec CTR brne 3f set 3: - ldi r30, lo8(aes_sbox) - ldi r31, hi8(aes_sbox) - movw SBOX_SAVE0, r30 /* encryption loop */ /* SBOX substitution and shifting */ - movw r30, SBOX_SAVE0 - add r30, ST00 - adc r31, r1 + mov r30, ST00 lpm ST00, Z - movw r30, SBOX_SAVE0 - add r30, ST10 - adc r31, r1 + mov r30, ST10 lpm ST10, Z - movw r30, SBOX_SAVE0 - add r30, ST20 - adc r31, r1 + mov r30, ST20 lpm ST20, Z - movw r30, SBOX_SAVE0 - add r30, ST30 - adc r31, r1 + mov r30, ST30 lpm ST30, Z - movw r30, SBOX_SAVE0 - add r30, ST01 - adc r31, r1 + mov r30, ST01 lpm T0, Z - movw r30, SBOX_SAVE0 - add r30, ST11 - adc r31, r1 + mov r30, ST11 lpm ST01, Z - movw r30, SBOX_SAVE0 - add r30, ST21 - adc r31, r1 + mov r30, ST21 lpm ST11, Z - movw r30, SBOX_SAVE0 - add r30, ST31 - adc r31, r1 + mov r30, ST31 lpm ST21, Z mov ST31, T0 - movw r30, SBOX_SAVE0 - add r30, ST02 - adc r31, r1 + mov r30, ST02 lpm T0, Z - movw r30, SBOX_SAVE0 - add r30, ST12 - adc r31, r1 + mov r30, ST12 lpm T1, Z - movw r30, SBOX_SAVE0 - add r30, ST22 - adc r31, r1 + mov r30, ST22 lpm ST02, Z - movw r30, SBOX_SAVE0 - add r30, ST32 - adc r31, r1 + mov r30, ST32 lpm ST12, Z mov ST22, T0 mov ST32, T1 - movw r30, SBOX_SAVE0 - add r30, ST03 - adc r31, r1 + mov r30, ST03 lpm T0, Z - movw r30, SBOX_SAVE0 - add r30, ST13 - adc r31, r1 - lpm T1, Z - movw r30, SBOX_SAVE0 - add r30, ST23 - adc r31, r1 - lpm T2, Z - movw r30, SBOX_SAVE0 - add r30, ST33 - adc r31, r1 + mov r30, ST33 lpm ST03, Z + mov r30, ST23 + lpm ST33, Z + mov r30, ST13 + lpm ST23, Z mov ST13, T0 - mov ST23, T1 - mov ST33, T2 - + /* mixcols (or rows in our case) */ brtc 2f rjmp 1b 2: - ldi xREDUCER, 0x1b /* load reducer */ + /* mixrow 1 */ + mov r0, ST02 + eor r0, ST03 + mov T2, r0 + + mov T0, ST00 + eor ST00, ST01 + eor r0, ST00 + lsl ST00 + brcc 3f + eor ST00, xREDUCER +3: eor ST00, r0 + eor ST00, T0 - ldi A, 2 - mov B, ST00 - rcall gf256mul - mov T0, r0 - ldi A, 3 - mov B, ST01 - rcall gf256mul - eor T0, r0 - eor T0, ST02 - eor T0, ST03 + mov T1, ST01 + eor T1, ST02 + lsl T1 + brcc 3f + eor T1, xREDUCER +3: eor T1, r0 + eor ST01, T1 - mov T1, ST00 - ldi A, 2 - mov B, ST01 - rcall gf256mul - eor T1, r0 - ldi A, 3 - mov B, ST02 - rcall gf256mul - eor T1, r0 - eor T1, ST03 + lsl T2 + brcc 3f + eor T2, xREDUCER +3: eor T2, r0 + eor ST02, T2 + + eor T0, ST03 + lsl T0 + brcc 3f + eor T0, xREDUCER +3: eor T0, r0 + eor ST03, T0 + + /* mixrow 2 */ + mov r0, ST12 + eor r0, ST13 + mov T2, r0 + + mov T0, ST10 + eor ST10, ST11 + eor r0, ST10 + lsl ST10 + brcc 3f + eor ST10, xREDUCER +3: eor ST10, r0 + eor ST10, T0 - mov T2, ST00 - eor T2, ST01 - ldi A, 2 - mov B, ST02 - rcall gf256mul - eor T2, r0 - ldi A, 3 - mov B, ST03 - rcall gf256mul - eor T2, r0 + mov T1, ST11 + eor T1, ST12 + lsl T1 + brcc 3f + eor T1, xREDUCER +3: eor T1, r0 + eor ST11, T1 - ldi A, 3 - mov B, ST00 - rcall gf256mul - mov T3, r0 - eor T3, ST01 - eor T3, ST02 - ldi A, 2 - mov B, ST03 - rcall gf256mul - eor T3, r0 + lsl T2 + brcc 3f + eor T2, xREDUCER +3: eor T2, r0 + eor ST12, T2 - mov ST00, T0 - mov ST01, T1 - mov ST02, T2 - mov ST03, T3 - - ldi A, 2 - mov B, ST10 - rcall gf256mul - mov T0, r0 - ldi A, 3 - mov B, ST11 - rcall gf256mul - eor T0, r0 - eor T0, ST12 eor T0, ST13 + lsl T0 + brcc 3f + eor T0, xREDUCER +3: eor T0, r0 + eor ST13, T0 + + /* mixrow 3 */ + mov r0, ST22 + eor r0, ST23 + mov T2, r0 + + mov T0, ST20 + eor ST20, ST21 + eor r0, ST20 + lsl ST20 + brcc 3f + eor ST20, xREDUCER +3: eor ST20, r0 + eor ST20, T0 - mov T1, ST10 - ldi A, 2 - mov B, ST11 - rcall gf256mul - eor T1, r0 - ldi A, 3 - mov B, ST12 - rcall gf256mul - eor T1, r0 - eor T1, ST13 - - mov T2, ST10 - eor T2, ST11 - ldi A, 2 - mov B, ST12 - rcall gf256mul - eor T2, r0 - ldi A, 3 - mov B, ST13 - rcall gf256mul - eor T2, r0 + mov T1, ST21 + eor T1, ST22 + lsl T1 + brcc 3f + eor T1, xREDUCER +3: eor T1, r0 + eor ST21, T1 - ldi A, 3 - mov B, ST10 - rcall gf256mul - mov T3, r0 - eor T3, ST11 - eor T3, ST12 - ldi A, 2 - mov B, ST13 - rcall gf256mul - eor T3, r0 + lsl T2 + brcc 3f + eor T2, xREDUCER +3: eor T2, r0 + eor ST22, T2 - mov ST10, T0 - mov ST11, T1 - mov ST12, T2 - mov ST13, T3 - - ldi A, 2 - mov B, ST20 - rcall gf256mul - mov T0, r0 - ldi A, 3 - mov B, ST21 - rcall gf256mul - eor T0, r0 - eor T0, ST22 eor T0, ST23 + lsl T0 + brcc 3f + eor T0, xREDUCER +3: eor T0, r0 + eor ST23, T0 + + /* mixrow 4 */ + mov r0, ST32 + eor r0, ST33 + mov T2, r0 + + mov T0, ST30 + eor ST30, ST31 + eor r0, ST30 + lsl ST30 + brcc 3f + eor ST30, xREDUCER +3: eor ST30, r0 + eor ST30, T0 - mov T1, ST20 - ldi A, 2 - mov B, ST21 - rcall gf256mul - eor T1, r0 - ldi A, 3 - mov B, ST22 - rcall gf256mul - eor T1, r0 - eor T1, ST23 - - mov T2, ST20 - eor T2, ST21 - ldi A, 2 - mov B, ST22 - rcall gf256mul - eor T2, r0 - ldi A, 3 - mov B, ST23 - rcall gf256mul - eor T2, r0 + mov T1, ST31 + eor T1, ST32 + lsl T1 + brcc 3f + eor T1, xREDUCER +3: eor T1, r0 + eor ST31, T1 - ldi A, 3 - mov B, ST20 - rcall gf256mul - mov T3, r0 - eor T3, ST21 - eor T3, ST22 - ldi A, 2 - mov B, ST23 - rcall gf256mul - eor T3, r0 + lsl T2 + brcc 3f + eor T2, xREDUCER +3: eor T2, r0 + eor ST32, T2 - mov ST20, T0 - mov ST21, T1 - mov ST22, T2 - mov ST23, T3 - - ldi A, 2 - mov B, ST30 - rcall gf256mul - mov T0, r0 - ldi A, 3 - mov B, ST31 - rcall gf256mul - eor T0, r0 - eor T0, ST32 eor T0, ST33 - - mov T1, ST30 - ldi A, 2 - mov B, ST31 - rcall gf256mul - eor T1, r0 - ldi A, 3 - mov B, ST32 - rcall gf256mul - eor T1, r0 - eor T1, ST33 - - mov T2, ST30 - eor T2, ST31 - ldi A, 2 - mov B, ST32 - rcall gf256mul - eor T2, r0 - ldi A, 3 - mov B, ST33 - rcall gf256mul - eor T2, r0 - - ldi A, 3 - mov B, ST30 - rcall gf256mul - mov T3, r0 - eor T3, ST31 - eor T3, ST32 - ldi A, 2 - mov B, ST33 - rcall gf256mul - eor T3, r0 - - mov ST30, T0 - mov ST31, T1 - mov ST32, T2 - mov ST33, T3 - + lsl T0 + brcc 3f + eor T0, xREDUCER +3: eor T0, r0 + eor ST33, T0 /* mix colums (rows) done */ /* add key*/ rjmp 1b -exit: - pop r31 - pop r30 - st Z+, ST00 - st Z+, ST01 - st Z+, ST02 - st Z+, ST03 - st Z+, ST10 - st Z+, ST11 - st Z+, ST12 - st Z+, ST13 - st Z+, ST20 - st Z+, ST21 - st Z+, ST22 - st Z+, ST23 - st Z+, ST30 - st Z+, ST31 - st Z+, ST32 - st Z+, ST33 - - pop r29 - pop r28 - pop_range 2, 17 - ret +