/* cscipher_tiny_asm.S */ /* This file is part of the AVR-Crypto-Lib. Copyright (C) 2006-2015 Daniel Otte (bg@nerilex.org) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include "avr-asm-macros.S" /* uint8_t p(uint8_t a){ a ^= pgm_read_byte(fg_table+(a&0xf))&0xf0; a ^= pgm_read_byte(fg_table+(a>>4)) &0x0f; a ^= pgm_read_byte(fg_table+(a&0xf))&0xf0; return a; } */ fg_table: .byte 0xfa, 0xd6, 0xb0, 0xb2, 0x7b, 0x5e, 0x71, 0x78 .byte 0xed, 0xd4, 0xa5, 0xb3, 0xef, 0xdc, 0xe7, 0xf9 .global p p: ldi r30, lo8(fg_table) ldi r31, hi8(fg_table) movw r26, r30 mov r25, r24 andi r25, 0x0F add r30, r25 adc r31, r1 lpm r25, Z andi r25, 0xF0 eor r24, r25 movw r30, r26 mov r25, r24 swap r25 andi r25, 0x0F add r30, r25 adc r31, r1 lpm r25, Z andi r25, 0x0F eor r24, r25 movw r30, r26 mov r25, r24 andi r25, 0x0F add r30, r25 adc r31, r1 lpm r25, Z andi r25, 0xF0 eor r24, r25 clr r25 ret ks_const: .byte 0x29,0x0d,0x61,0x40,0x9c,0xeb,0x9e,0x8f .byte 0x1f,0x85,0x5f,0x58,0x5b,0x01,0x39,0x86 .byte 0x97,0x2e,0xd7,0xd6,0x35,0xae,0x17,0x16 .byte 0x21,0xb6,0x69,0x4e,0xa5,0x72,0x87,0x08 .byte 0x3c,0x18,0xe6,0xe7,0xfa,0xad,0xb8,0x89 .byte 0xb7,0x00,0xf7,0x6f,0x73,0x84,0x11,0x63 .byte 0x3f,0x96,0x7f,0x6e,0xbf,0x14,0x9d,0xac .byte 0xa4,0x0e,0x7e,0xf6,0x20,0x4a,0x62,0x30 .byte 0x03,0xc5,0x4b,0x5a,0x46,0xa3,0x44,0x65 CTX_0 = 18 CTX_1 = 19 CNT = 17 .global cscipher_init cscipher_init: push CNT push_range 28, 29 stack_alloc 24, 28, 29 adiw r28, 1 movw r30, r24 movw CTX_0, r22 /* copy key to local tmp_key */ ldi r22, 16 10: ld r23, Z+ st Y+, r23 dec r22 brne 10b sbiw r28, 16 ldi CNT, 0xff 10: /* main loop */ inc CNT /* copy part of tmp_key to tmp */ ldi r23, 8 11: ldd r22, Y+0 sbrc CNT, 0 ldd r22, Y+8 std Y+16, r22 adiw r28, 1 dec r23 brne 11b adiw r28, 8 /* Y points at tmp */ /* xor ks constant into tmp */ movw r24, r28 ldi r22, lo8(ks_const) ldi r23, hi8(ks_const) mov r21, CNT swap r21 lsr r21 add r22, r21 adc r23, r1 clr r21 ldi r20, 8 call memxor_P /* do P transformation */ ldi r22, 8 20: ld r24, Y rcall p st Y+, r24 dec r22 brne 20b sbiw r28, 8 /* Y points at tmp */ movw r26, r28 sbiw r26, 8 sbrc CNT, 0 sbiw r26, 8 /* do T transformation */ movw r30, CTX_0 ldi r22, 8 30: ldi r23, 8 35: ld r24, Y rol r24 rol r21 st Y+, r24 dec r23 brne 35b sbiw r28, 8 /* Y points at tmp */ ld r24, X eor r21, r24 st X+, r21 st Z+, r21 dec r22 brne 30b sbiw r28, 16 /* Y points at tmp_key (again) */ movw CTX_0, r30 sbrs CNT, 3 rjmp 10b stack_free 24 pop_range 28, 29 pop CNT ret round_const: .byte 0xb7, 0xe1, 0x51, 0x62, 0x8a, 0xed, 0x2a, 0x6a .byte 0xbf, 0x71, 0x58, 0x80, 0x9c, 0xf4, 0xf3, 0xc7 /* void cscipher_enc(void *buffer, const cscipher_ctx_t *ctx){ uint8_t i,j,k; uint8_t tmp[8]; for(i=0; i<8; ++i){ for(j=0; j<3; ++j){ if(j==0){ memxor(buffer, ctx->keys[i], 8); }else{ memxor_P(buffer, round_const+((j==1)?0:8), 8); } for(k=0; k<4; ++k){ ((uint16_t*)tmp)[k] = m(((uint16_t*)buffer)[k]); } for(k=0; k<4; ++k){ ((uint8_t*)buffer)[k] = tmp[2*k]; ((uint8_t*)buffer)[k+4] = tmp[2*k+1]; } } } memxor(buffer, ctx->keys[8], 8); } */ TMP_0 = 2 TMP_1 = 3 TMP_2 = 4 TMP_3 = 5 TMP_4 = 6 TMP_5 = 7 TMP_6 = 8 TMP_7 = 9 CTX_0 = 10 CTX_1 = 11 CNT_0 = 16 CNT_1 = 17 DST_0 = 12 DST_1 = 13 SRC_0 = 14 SRC_1 = 15 .global cscipher_enc cscipher_enc: push_range 2, 17 push_range 28, 29 movw r28, r24 movw CTX_0, r22 ldi CNT_0, 8 /* main loop */ 10: ldi CNT_1, 2 clt /* sub loop */ 20: ldi r27, 0 ldi r26, TMP_0 movw DST_0, r26 ldi r30, lo8(round_const) ldi r31, hi8(round_const) sbrs CNT_1, 0 adiw r30, 8 sbrc CNT_1, 1 movw r30, CTX_0 movw SRC_0, r30 ldi r21, 4 /* xor and m transformation */ 25: ld r24, Y+ ld r25, Y+ movw r30, SRC_0 brts 30f ld r22, Z+ ld r23, Z+ rjmp 35f 30: lpm r22, Z+ lpm r23, Z+ 35: movw SRC_0, r30 eor r24, r22 eor r25, r23 movw r22, r24 mov r25, r22 rol r25 adc r25, r1 mov r22, r25 andi r22, 0x55 eor r22, r24 eor r22, r23 eor r23, r25 mov r24, r23 rcall p mov r23, r24 mov r24, r22 rcall p movw r26, DST_0 st X+, r24 st X+, r23 movw DST_0, r26 dec r21 brne 25b sbrc CNT_1, 1 movw CTX_0, SRC_0 sbiw r28, 8 std Y+0, TMP_0 std Y+4, TMP_1 std Y+1, TMP_2 std Y+5, TMP_3 std Y+2, TMP_4 std Y+6, TMP_5 std Y+3, TMP_6 std Y+7, TMP_7 set dec CNT_1 brpl 20b dec CNT_0 brne 10b movw r24, r28 movw r22, CTX_0 clr r21 ldi r20, 8 pop_range 28, 29 pop_range 2, 17 rjmp memxor /* void cscipher_dec(void *buffer, const cscipher_ctx_t *ctx){ uint8_t i=7,j,k; uint8_t tmp[8]; memxor(buffer, ctx->keys[8], 8); do{ for(j=0; j<3; ++j){ for(k=0; k<4; ++k){ tmp[2*k] = ((uint8_t*)buffer)[k]; tmp[2*k+1] = ((uint8_t*)buffer)[4+k]; } for(k=0; k<4; ++k){ ((uint16_t*)buffer)[k] = m_inv(((uint16_t*)tmp)[k]); } if(j==2){ memxor(buffer, ctx->keys[i], 8); }else{ memxor_P(buffer, round_const+((j==1)?0:8), 8); } } }while(i--); } */ .global cscipher_dec cscipher_dec: push_range 2, 17 push_range 28, 29 movw r28, r24 movw r26, r22 adiw r26, 7*8 adiw r26, 8 movw CTX_0, r26 movw r22, r26 clr r21 ldi r20, 8 call memxor ldi CNT_0, 7 10: ldi CNT_1, 3 20: clr r27 ldi r26, TMP_0 movw DST_0, r26 ldi r21, 4 30: ldd r23, Y+4 ld r24, Y+ /* m_inv transformation */ ; mov r23, r25 rcall p mov r22, r24 mov r24, r23 rcall p eor r22, r24 mov r25, r24 mov r24, r22 rol r24 adc r24, r1 andi r24, 0xaa eor r24, r22 mov r22, r24 rol r22 adc r22, r1 eor r25, r22 movw r26, DST_0 st X+, r24 st X+, r25 movw DST_0, r26 dec r21 brne 30b sbiw r28, 4 std Y+0, TMP_0 std Y+1, TMP_1 std Y+2, TMP_2 std Y+3, TMP_3 std Y+4, TMP_4 std Y+5, TMP_5 std Y+6, TMP_6 std Y+7, TMP_7 movw r24, r28 clr r21 ldi r20, 8 sbrc CNT_1, 1 rjmp 40f movw r26, CTX_0 sbiw r26, 8 movw CTX_0, r26 movw r22, r26 call memxor rjmp 45f 40: ldi r26, lo8(round_const) ldi r27, hi8(round_const) sbrc CNT_1, 0 adiw r26, 8 movw r22, r26 call memxor_P 45: dec CNT_1 brne 20b dec CNT_0 brpl 10b 90: pop_range 28, 29 pop_range 2, 17 ret