X-Git-Url: https://git.cryptolib.org/?a=blobdiff_plain;f=cscipher%2Fcscipher_tiny_asm.S;fp=cscipher%2Fcscipher_tiny_asm.S;h=bba78f91f65a0d459efd56b26d6f8fe49be9cc44;hb=0542221a3e793e4455e84b271b07aa0959d5dfb3;hp=0000000000000000000000000000000000000000;hpb=d7838aec004a60bafeb05ca862ae2a413e4aa7dc;p=avr-crypto-lib.git diff --git a/cscipher/cscipher_tiny_asm.S b/cscipher/cscipher_tiny_asm.S new file mode 100644 index 0000000..bba78f9 --- /dev/null +++ b/cscipher/cscipher_tiny_asm.S @@ -0,0 +1,399 @@ +/* cscipher_tiny_asm.S */ +/* + This file is part of the AVR-Crypto-Lib. + Copyright (C) 2006-2010 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include "avr-asm-macros.S" + +/* +uint8_t p(uint8_t a){ + a ^= pgm_read_byte(fg_table+(a&0xf))&0xf0; + a ^= pgm_read_byte(fg_table+(a>>4)) &0x0f; + a ^= pgm_read_byte(fg_table+(a&0xf))&0xf0; + return a; +} +*/ + +fg_table: +.byte 0xfa, 0xd6, 0xb0, 0xb2, 0x7b, 0x5e, 0x71, 0x78 +.byte 0xed, 0xd4, 0xa5, 0xb3, 0xef, 0xdc, 0xe7, 0xf9 + +.global p +p: + ldi r30, lo8(fg_table) + ldi r31, hi8(fg_table) + movw r26, r30 + mov r25, r24 + andi r25, 0x0F + add r30, r25 + adc r31, r1 + lpm r25, Z + andi r25, 0xF0 + eor r24, r25 + + movw r30, r26 + mov r25, r24 + swap r25 + andi r25, 0x0F + add r30, r25 + adc r31, r1 + lpm r25, Z + andi r25, 0x0F + eor r24, r25 + + movw r30, r26 + mov r25, r24 + andi r25, 0x0F + add r30, r25 + adc r31, r1 + lpm r25, Z + andi r25, 0xF0 + eor r24, r25 + clr r25 + ret + +ks_const: +.byte 0x29,0x0d,0x61,0x40,0x9c,0xeb,0x9e,0x8f +.byte 0x1f,0x85,0x5f,0x58,0x5b,0x01,0x39,0x86 +.byte 0x97,0x2e,0xd7,0xd6,0x35,0xae,0x17,0x16 +.byte 0x21,0xb6,0x69,0x4e,0xa5,0x72,0x87,0x08 +.byte 0x3c,0x18,0xe6,0xe7,0xfa,0xad,0xb8,0x89 +.byte 0xb7,0x00,0xf7,0x6f,0x73,0x84,0x11,0x63 +.byte 0x3f,0x96,0x7f,0x6e,0xbf,0x14,0x9d,0xac +.byte 0xa4,0x0e,0x7e,0xf6,0x20,0x4a,0x62,0x30 +.byte 0x03,0xc5,0x4b,0x5a,0x46,0xa3,0x44,0x65 + +CTX_0 = 18 +CTX_1 = 19 +CNT = 17 +.global cscipher_init +cscipher_init: + push CNT + push_range 28, 29 + stack_alloc 24, 28, 29 + adiw r28, 1 + movw r30, r24 + movw CTX_0, r22 + /* copy key to local tmp_key */ + ldi r22, 16 +10: ld r23, Z+ + st Y+, r23 + dec r22 + brne 10b + sbiw r28, 16 + ldi CNT, 0xff +10: /* main loop */ + inc CNT + /* copy part of tmp_key to tmp */ + ldi r23, 8 +11: ldd r22, Y+0 + sbrc CNT, 0 + ldd r22, Y+8 + std Y+16, r22 + adiw r28, 1 + dec r23 + brne 11b + adiw r28, 8 /* Y points at tmp */ + /* xor ks constant into tmp */ + movw r24, r28 + ldi r22, lo8(ks_const) + ldi r23, hi8(ks_const) + mov r21, CNT + swap r21 + lsr r21 + add r22, r21 + adc r23, r1 + clr r21 + ldi r20, 8 + call memxor_P + /* do P transformation */ + ldi r22, 8 +20: ld r24, Y + rcall p + st Y+, r24 + dec r22 + brne 20b + sbiw r28, 8 /* Y points at tmp */ + movw r26, r28 + sbiw r26, 8 + sbrc CNT, 0 + sbiw r26, 8 + /* do T transformation */ + movw r30, CTX_0 + ldi r22, 8 +30: ldi r23, 8 +35: ld r24, Y + rol r24 + rol r21 + st Y+, r24 + dec r23 + brne 35b + sbiw r28, 8 /* Y points at tmp */ + ld r24, X + eor r21, r24 + st X+, r21 + st Z+, r21 + dec r22 + brne 30b + sbiw r28, 16 /* Y points at tmp_key (again) */ + movw CTX_0, r30 + sbrs CNT, 3 + rjmp 10b + stack_free 24 + pop_range 28, 29 + pop CNT + ret + + +round_const: +.byte 0xb7, 0xe1, 0x51, 0x62, 0x8a, 0xed, 0x2a, 0x6a +.byte 0xbf, 0x71, 0x58, 0x80, 0x9c, 0xf4, 0xf3, 0xc7 + +/* +void cscipher_enc(void* buffer, const cscipher_ctx_t* ctx){ + uint8_t i,j,k; + uint8_t tmp[8]; + for(i=0; i<8; ++i){ + for(j=0; j<3; ++j){ + if(j==0){ + memxor(buffer, ctx->keys[i], 8); + }else{ + memxor_P(buffer, round_const+((j==1)?0:8), 8); + } + for(k=0; k<4; ++k){ + ((uint16_t*)tmp)[k] = m(((uint16_t*)buffer)[k]); + } + for(k=0; k<4; ++k){ + ((uint8_t*)buffer)[k] = tmp[2*k]; + ((uint8_t*)buffer)[k+4] = tmp[2*k+1]; + } + } + } + memxor(buffer, ctx->keys[8], 8); +} +*/ +TMP_0 = 2 +TMP_1 = 3 +TMP_2 = 4 +TMP_3 = 5 +TMP_4 = 6 +TMP_5 = 7 +TMP_6 = 8 +TMP_7 = 9 +CTX_0 = 10 +CTX_1 = 11 +CNT_0 = 16 +CNT_1 = 17 +DST_0 = 12 +DST_1 = 13 +SRC_0 = 14 +SRC_1 = 15 +.global cscipher_enc +cscipher_enc: + push_range 2, 17 + push_range 28, 29 + movw r28, r24 + movw CTX_0, r22 + ldi CNT_0, 8 + /* main loop */ +10: ldi CNT_1, 2 + clt + /* sub loop */ +20: ldi r27, 0 + ldi r26, TMP_0 + movw DST_0, r26 + ldi r30, lo8(round_const) + ldi r31, hi8(round_const) + sbrs CNT_1, 0 + adiw r30, 8 + sbrc CNT_1, 1 + movw r30, CTX_0 + movw SRC_0, r30 + ldi r21, 4 + /* xor and m transformation */ +25: ld r24, Y+ + ld r25, Y+ + movw r30, SRC_0 + brts 30f + ld r22, Z+ + ld r23, Z+ + rjmp 35f +30: lpm r22, Z+ + lpm r23, Z+ +35: + movw SRC_0, r30 + eor r24, r22 + eor r25, r23 + + movw r22, r24 + mov r25, r22 + rol r25 + adc r25, r1 + mov r22, r25 + andi r22, 0x55 + eor r22, r24 + eor r22, r23 + eor r23, r25 + mov r24, r23 + rcall p + mov r23, r24 + mov r24, r22 + rcall p + + movw r26, DST_0 + st X+, r24 + st X+, r23 + movw DST_0, r26 + dec r21 + brne 25b + sbrc CNT_1, 1 + movw CTX_0, SRC_0 + sbiw r28, 8 + std Y+0, TMP_0 + std Y+4, TMP_1 + std Y+1, TMP_2 + std Y+5, TMP_3 + std Y+2, TMP_4 + std Y+6, TMP_5 + std Y+3, TMP_6 + std Y+7, TMP_7 + set + dec CNT_1 + brpl 20b + + dec CNT_0 + brne 10b + + movw r24, r28 + movw r22, CTX_0 + clr r21 + ldi r20, 8 + + pop_range 28, 29 + pop_range 2, 17 + rjmp memxor + +/* +void cscipher_dec(void* buffer, const cscipher_ctx_t* ctx){ + uint8_t i=7,j,k; + uint8_t tmp[8]; + memxor(buffer, ctx->keys[8], 8); + do{ + for(j=0; j<3; ++j){ + for(k=0; k<4; ++k){ + tmp[2*k] = ((uint8_t*)buffer)[k]; + tmp[2*k+1] = ((uint8_t*)buffer)[4+k]; + } + for(k=0; k<4; ++k){ + ((uint16_t*)buffer)[k] = m_inv(((uint16_t*)tmp)[k]); + } + if(j==2){ + memxor(buffer, ctx->keys[i], 8); + }else{ + memxor_P(buffer, round_const+((j==1)?0:8), 8); + } + + } + }while(i--); +} + +*/ +.global cscipher_dec +cscipher_dec: + push_range 2, 17 + push_range 28, 29 + movw r28, r24 + movw r26, r22 + adiw r26, 7*8 + adiw r26, 8 + movw CTX_0, r26 + movw r22, r26 + clr r21 + ldi r20, 8 + call memxor + ldi CNT_0, 7 +10: + ldi CNT_1, 3 +20: + clr r27 + ldi r26, TMP_0 + movw DST_0, r26 + ldi r21, 4 +30: + ldd r23, Y+4 + ld r24, Y+ +/* m_inv transformation */ +; mov r23, r25 + rcall p + mov r22, r24 + mov r24, r23 + rcall p + eor r22, r24 + mov r25, r24 + mov r24, r22 + rol r24 + adc r24, r1 + andi r24, 0xaa + eor r24, r22 + mov r22, r24 + rol r22 + adc r22, r1 + eor r25, r22 + + movw r26, DST_0 + st X+, r24 + st X+, r25 + movw DST_0, r26 + dec r21 + brne 30b + sbiw r28, 4 + std Y+0, TMP_0 + std Y+1, TMP_1 + std Y+2, TMP_2 + std Y+3, TMP_3 + std Y+4, TMP_4 + std Y+5, TMP_5 + std Y+6, TMP_6 + std Y+7, TMP_7 + movw r24, r28 + clr r21 + ldi r20, 8 + sbrc CNT_1, 1 + rjmp 40f + movw r26, CTX_0 + sbiw r26, 8 + movw CTX_0, r26 + movw r22, r26 + call memxor + rjmp 45f +40: + ldi r26, lo8(round_const) + ldi r27, hi8(round_const) + sbrc CNT_1, 0 + adiw r26, 8 + movw r22, r26 + call memxor_P +45: + + dec CNT_1 + brne 20b + dec CNT_0 + brpl 10b +90: + pop_range 28, 29 + pop_range 2, 17 + ret