/* aes_dec-asm.S */ /* This file is part of the AVR-Crypto-Lib. Copyright (C) 2008, 2009 Daniel Otte (daniel.otte@rub.de) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ /** * \file aes_dec-asm.S * \email daniel.otte@rub.de * \author Daniel Otte * \date 2009-01-10 * \license GPLv3 or later * */ #include "avr-asm-macros.S" A = 28 B = 29 P = 0 xREDUCER = 25 .global aes256_dec aes256_dec: ldi r20, 14 rjmp aes_decrypt_core .global aes192_dec aes192_dec: ldi r20, 12 rjmp aes_decrypt_core .global aes128_dec aes128_dec: ldi r20, 10 /* void aes_decrypt_core(aes_cipher_state_t *state, const aes_genctx_t *ks, uint8_t rounds) */ T0= 2 T1= 3 T2= 4 T3= 5 T4 = 6 T5 = 7 ST00 = 8 ST01 = 9 ST02 = 10 ST03 = 11 ST10 = 12 ST11 = 13 ST12 = 14 ST13 = 15 ST20 = 16 ST21 = 17 ST22 = 18 ST23 = 19 ST30 = 20 ST31 = 21 ST32 = 22 ST33 = 23 CTR = 24 /* * param state: r24:r25 * param ks: r22:r23 * param rounds: r20 */ .global aes_decrypt_core aes_decrypt_core: push_range 2, 17 push r28 push r29 push r24 push r25 movw r26, r22 movw r30, r24 mov CTR, r20 inc r20 swap r20 /* r20*16 */ add r26, r20 adc r27, r1 clt .irp param, ST00, ST01, ST02, ST03, ST10, ST11, ST12, ST13, ST20, ST21, ST22, ST23, ST30, ST31, ST32, ST33 ld \param, Z+ .endr ldi xREDUCER, 0x1b /* load reducer */ .irp param, ST33, ST32, ST31, ST30, ST23, ST22, ST21, ST20, ST13, ST12, ST11, ST10, ST03, ST02, ST01, ST00 ld r0, -X eor \param, r0 .endr 1: dec CTR brne 2f set 2: ldi r31, hi8(aes_invsbox) /* substitute and invShift */ .irp param, ST00, ST10, ST20, ST30 mov r30, \param lpm \param, Z .endr mov r30, ST31 lpm T0, Z mov r30, ST21 lpm ST31, Z mov r30, ST11 lpm ST21, Z mov r30, ST01 lpm ST11, Z mov ST01, T0 mov r30, ST32 lpm T0, Z mov r30, ST22 lpm T1,Z mov r30, ST12 lpm ST32, Z mov r30, ST02 lpm ST22, Z mov ST12, T0 mov ST02, T1 mov r30, ST03 lpm T0, Z mov r30, ST13 lpm ST03, Z mov r30, ST23 lpm ST13, Z mov r30, ST33 lpm ST23, Z mov ST33, T0 /* key addition */ .irp param, ST33, ST32, ST31, ST30, ST23, ST22, ST21, ST20, ST13, ST12, ST11, ST10, ST03, ST02, ST01, ST00 ld r0, -X eor \param, r0 .endr brtc 2f exit: pop r31 pop r30 st Z+, ST00 st Z+, ST01 st Z+, ST02 st Z+, ST03 st Z+, ST10 st Z+, ST11 st Z+, ST12 st Z+, ST13 st Z+, ST20 st Z+, ST21 st Z+, ST22 st Z+, ST23 st Z+, ST30 st Z+, ST31 st Z+, ST32 st Z+, ST33 pop r29 pop r28 pop_range 2, 17 ret 2: /* inv column (row) mixing*/ /* invMixCol (Row) 1 */ /* preparing */ ldi r31, hi8(lut_gf256mul_0x09) mov T0, ST03 eor T0, ST02 ; T0 = t mov T1, ST00 eor T1, ST01 ; T1 = u mov r30, T0 eor r30, T1 lpm T2, Z ; T2 = v' ldi r31, hi8(lut_gf256mul_0x04) mov r30, ST02 eor r30, ST00 lpm T3, Z eor T3, T2; T3 = w mov r30, ST03 eor r30, ST01 lpm P, Z ; T2 = v eor T2, P /* now the big move */ mov T4, ST00 eor T4, ST03 lsl T4 brcc 3f eor T4, xREDUCER 3: eor T4, T2 eor ST03, T4 mov T4, ST02 eor T4, ST01 lsl T4 brcc 3f eor T4, xREDUCER 3: eor T4, T2 eor ST01, T4 lsl T0 brcc 3f eor T0, xREDUCER 3: eor T0, T3 eor ST02, T0 lsl T1 brcc 3f eor T1, xREDUCER 3: eor T1, T3 eor ST00, T1 /* invMixCol (Row) 2 */ /* preparing */ ldi r31, hi8(lut_gf256mul_0x09) mov T0, ST13 eor T0, ST12 ; T0 = t mov T1, ST10 eor T1, ST11 ; T1 = u mov r30, T0 eor r30, T1 lpm T2, Z ; T2 = v' ldi r31, hi8(lut_gf256mul_0x04) mov r30, ST12 eor r30, ST10 lpm T3, Z eor T3, T2; T3 = w mov r30, ST13 eor r30, ST11 lpm P, Z eor T2, P ; T2 = v /* now the big move */ mov T4, ST10 eor T4, ST13 lsl T4 brcc 3f eor T4, xREDUCER 3: eor T4, T2 eor ST13, T4 mov T4, ST12 eor T4, ST11 lsl T4 brcc 3f eor T4, xREDUCER 3: eor T4, T2 eor ST11, T4 lsl T0 brcc 3f eor T0, xREDUCER 3: eor T0, T3 eor ST12, T0 lsl T1 brcc 3f eor T1, xREDUCER 3: eor T1, T3 eor ST10, T1 /* invMixCol (Row) 2 */ /* preparing */ ldi r31, hi8(lut_gf256mul_0x09) mov T0, ST23 eor T0, ST22 ; T0 = t mov T1, ST20 eor T1, ST21 ; T1 = u mov r30, T0 eor r30, T1 lpm T2, Z ; T2 = v' ldi r31, hi8(lut_gf256mul_0x04) mov r30, ST22 eor r30, ST20 lpm T3, Z eor T3, T2; T3 = w mov r30, ST23 eor r30, ST21 lpm P, Z eor T2, P ; T2 = v /* now the big move */ mov T4, ST20 eor T4, ST23 lsl T4 brcc 3f eor T4, xREDUCER 3: eor T4, T2 eor ST23, T4 mov T4, ST22 eor T4, ST21 lsl T4 brcc 3f eor T4, xREDUCER 3: eor T4, T2 eor ST21, T4 lsl T0 brcc 3f eor T0, xREDUCER 3: eor T0, T3 eor ST22, T0 lsl T1 brcc 3f eor T1, xREDUCER 3: eor T1, T3 eor ST20, T1 /* invMixCol (Row) 3 */ /* preparing */ ldi r31, hi8(lut_gf256mul_0x09) mov T0, ST33 eor T0, ST32 ; T0 = t mov T1, ST30 eor T1, ST31 ; T1 = u mov r30, T0 eor r30, T1 lpm T2, Z ; T2 = v' ldi r31, hi8(lut_gf256mul_0x04) mov r30, ST32 eor r30, ST30 lpm T3, Z eor T3, T2; T3 = w mov r30, ST33 eor r30, ST31 lpm P, Z eor T2, P ; T2 = v /* now the big move */ mov T4, ST30 eor T4, ST33 lsl T4 brcc 3f eor T4, xREDUCER 3: eor T4, T2 eor ST33, T4 mov T4, ST32 eor T4, ST31 lsl T4 brcc 3f eor T4, xREDUCER 3: eor T4, T2 eor ST31, T4 lsl T0 brcc 3f eor T0, xREDUCER 3: eor T0, T3 eor ST32, T0 lsl T1 brcc 3f eor T1, xREDUCER 3: eor T1, T3 eor ST30, T1 rjmp 1b .balign 256 lut_gf256mul_0x09: .byte 0x00, 0x09, 0x12, 0x1B, 0x24, 0x2D, 0x36, 0x3F .byte 0x48, 0x41, 0x5A, 0x53, 0x6C, 0x65, 0x7E, 0x77 .byte 0x90, 0x99, 0x82, 0x8B, 0xB4, 0xBD, 0xA6, 0xAF .byte 0xD8, 0xD1, 0xCA, 0xC3, 0xFC, 0xF5, 0xEE, 0xE7 .byte 0x3B, 0x32, 0x29, 0x20, 0x1F, 0x16, 0x0D, 0x04 .byte 0x73, 0x7A, 0x61, 0x68, 0x57, 0x5E, 0x45, 0x4C .byte 0xAB, 0xA2, 0xB9, 0xB0, 0x8F, 0x86, 0x9D, 0x94 .byte 0xE3, 0xEA, 0xF1, 0xF8, 0xC7, 0xCE, 0xD5, 0xDC .byte 0x76, 0x7F, 0x64, 0x6D, 0x52, 0x5B, 0x40, 0x49 .byte 0x3E, 0x37, 0x2C, 0x25, 0x1A, 0x13, 0x08, 0x01 .byte 0xE6, 0xEF, 0xF4, 0xFD, 0xC2, 0xCB, 0xD0, 0xD9 .byte 0xAE, 0xA7, 0xBC, 0xB5, 0x8A, 0x83, 0x98, 0x91 .byte 0x4D, 0x44, 0x5F, 0x56, 0x69, 0x60, 0x7B, 0x72 .byte 0x05, 0x0C, 0x17, 0x1E, 0x21, 0x28, 0x33, 0x3A .byte 0xDD, 0xD4, 0xCF, 0xC6, 0xF9, 0xF0, 0xEB, 0xE2 .byte 0x95, 0x9C, 0x87, 0x8E, 0xB1, 0xB8, 0xA3, 0xAA .byte 0xEC, 0xE5, 0xFE, 0xF7, 0xC8, 0xC1, 0xDA, 0xD3 .byte 0xA4, 0xAD, 0xB6, 0xBF, 0x80, 0x89, 0x92, 0x9B .byte 0x7C, 0x75, 0x6E, 0x67, 0x58, 0x51, 0x4A, 0x43 .byte 0x34, 0x3D, 0x26, 0x2F, 0x10, 0x19, 0x02, 0x0B .byte 0xD7, 0xDE, 0xC5, 0xCC, 0xF3, 0xFA, 0xE1, 0xE8 .byte 0x9F, 0x96, 0x8D, 0x84, 0xBB, 0xB2, 0xA9, 0xA0 .byte 0x47, 0x4E, 0x55, 0x5C, 0x63, 0x6A, 0x71, 0x78 .byte 0x0F, 0x06, 0x1D, 0x14, 0x2B, 0x22, 0x39, 0x30 .byte 0x9A, 0x93, 0x88, 0x81, 0xBE, 0xB7, 0xAC, 0xA5 .byte 0xD2, 0xDB, 0xC0, 0xC9, 0xF6, 0xFF, 0xE4, 0xED .byte 0x0A, 0x03, 0x18, 0x11, 0x2E, 0x27, 0x3C, 0x35 .byte 0x42, 0x4B, 0x50, 0x59, 0x66, 0x6F, 0x74, 0x7D .byte 0xA1, 0xA8, 0xB3, 0xBA, 0x85, 0x8C, 0x97, 0x9E .byte 0xE9, 0xE0, 0xFB, 0xF2, 0xCD, 0xC4, 0xDF, 0xD6 .byte 0x31, 0x38, 0x23, 0x2A, 0x15, 0x1C, 0x07, 0x0E .byte 0x79, 0x70, 0x6B, 0x62, 0x5D, 0x54, 0x4F, 0x46 lut_gf256mul_0x04: .byte 0x00, 0x04, 0x08, 0x0C, 0x10, 0x14, 0x18, 0x1C .byte 0x20, 0x24, 0x28, 0x2C, 0x30, 0x34, 0x38, 0x3C .byte 0x40, 0x44, 0x48, 0x4C, 0x50, 0x54, 0x58, 0x5C .byte 0x60, 0x64, 0x68, 0x6C, 0x70, 0x74, 0x78, 0x7C .byte 0x80, 0x84, 0x88, 0x8C, 0x90, 0x94, 0x98, 0x9C .byte 0xA0, 0xA4, 0xA8, 0xAC, 0xB0, 0xB4, 0xB8, 0xBC .byte 0xC0, 0xC4, 0xC8, 0xCC, 0xD0, 0xD4, 0xD8, 0xDC .byte 0xE0, 0xE4, 0xE8, 0xEC, 0xF0, 0xF4, 0xF8, 0xFC .byte 0x1B, 0x1F, 0x13, 0x17, 0x0B, 0x0F, 0x03, 0x07 .byte 0x3B, 0x3F, 0x33, 0x37, 0x2B, 0x2F, 0x23, 0x27 .byte 0x5B, 0x5F, 0x53, 0x57, 0x4B, 0x4F, 0x43, 0x47 .byte 0x7B, 0x7F, 0x73, 0x77, 0x6B, 0x6F, 0x63, 0x67 .byte 0x9B, 0x9F, 0x93, 0x97, 0x8B, 0x8F, 0x83, 0x87 .byte 0xBB, 0xBF, 0xB3, 0xB7, 0xAB, 0xAF, 0xA3, 0xA7 .byte 0xDB, 0xDF, 0xD3, 0xD7, 0xCB, 0xCF, 0xC3, 0xC7 .byte 0xFB, 0xFF, 0xF3, 0xF7, 0xEB, 0xEF, 0xE3, 0xE7 .byte 0x36, 0x32, 0x3E, 0x3A, 0x26, 0x22, 0x2E, 0x2A .byte 0x16, 0x12, 0x1E, 0x1A, 0x06, 0x02, 0x0E, 0x0A .byte 0x76, 0x72, 0x7E, 0x7A, 0x66, 0x62, 0x6E, 0x6A .byte 0x56, 0x52, 0x5E, 0x5A, 0x46, 0x42, 0x4E, 0x4A .byte 0xB6, 0xB2, 0xBE, 0xBA, 0xA6, 0xA2, 0xAE, 0xAA .byte 0x96, 0x92, 0x9E, 0x9A, 0x86, 0x82, 0x8E, 0x8A .byte 0xF6, 0xF2, 0xFE, 0xFA, 0xE6, 0xE2, 0xEE, 0xEA .byte 0xD6, 0xD2, 0xDE, 0xDA, 0xC6, 0xC2, 0xCE, 0xCA .byte 0x2D, 0x29, 0x25, 0x21, 0x3D, 0x39, 0x35, 0x31 .byte 0x0D, 0x09, 0x05, 0x01, 0x1D, 0x19, 0x15, 0x11 .byte 0x6D, 0x69, 0x65, 0x61, 0x7D, 0x79, 0x75, 0x71 .byte 0x4D, 0x49, 0x45, 0x41, 0x5D, 0x59, 0x55, 0x51 .byte 0xAD, 0xA9, 0xA5, 0xA1, 0xBD, 0xB9, 0xB5, 0xB1 .byte 0x8D, 0x89, 0x85, 0x81, 0x9D, 0x99, 0x95, 0x91 .byte 0xED, 0xE9, 0xE5, 0xE1, 0xFD, 0xF9, 0xF5, 0xF1 .byte 0xCD, 0xC9, 0xC5, 0xC1, 0xDD, 0xD9, 0xD5, 0xD1