X-Git-Url: https://git.cryptolib.org/?a=blobdiff_plain;f=threefish512_dec_asm.S;fp=threefish512_dec_asm.S;h=5ac9c0d9b003bcfffd5f49f63a0651e63000e711;hb=877bd61c0f8df66c000d0f9f35d87cddc5dd0957;hp=0000000000000000000000000000000000000000;hpb=c239a90de4f9ea98d7c0f5962d200c1a6a6033d1;p=avr-crypto-lib.git diff --git a/threefish512_dec_asm.S b/threefish512_dec_asm.S new file mode 100644 index 0000000..5ac9c0d --- /dev/null +++ b/threefish512_dec_asm.S @@ -0,0 +1,330 @@ +/* threefish512_enc_asm.S */ +/* + This file is part of the AVR-Crypto-Lib. + Copyright (C) 2009 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ +/* + * \author Daniel Otte + * \email daniel.otte@rub.de + * \date 2009-03-24 + * \license GPLv3 or later + */ + +#include "avr-asm-macros.S" + +/******************************************************************************/ +/* +#define X(a) (((uint64_t*)data)[(a)]) + + +static +void permute_inv8(void* data){ + uint64_t t; + t = X(6); + X(6) = X(4); + X(4) = X(2); + X(2) = X(0); + X(0) = t; + t = X(7); + X(7) = X(3); + X(3) = t; +} + +static +void add_key_8(void* data, const threefish512_ctx_t* ctx, uint8_t s){ + uint8_t i; + for(i=0; i<5; ++i){ + X(i) -= ctx->k[(s+i)%9]; + } + X(5) -= ctx->k[(s+5)%9] + ctx->t[s%3]; + X(6) -= ctx->k[(s+6)%9] + ctx->t[(s+1)%3]; + X(7) -= ctx->k[(s+7)%9] + s; +} + +void threefish512_dec(void* data, const threefish512_ctx_t* ctx){ + uint8_t i=0,s=18; + uint8_t r0[8] = {0x41, 0x4b, 0x59, 0x41, 0x32, 0x42, 0x60, 0x5a}; + uint8_t r1[8] = {0x63, 0x32, 0x33, 0x61, 0x14, 0x2a, 0x24, 0x4a}; + uint8_t r2[8] = {0x59, 0x13, 0x51, 0x10, 0x72, 0x29, 0x53, 0x62}; + uint8_t r3[8] = {0x43, 0x11, 0x2a, 0x52, 0x19, 0x33, 0x49, 0x7b}; + do{ + if(i%4==0){ + add_key_8(data, ctx, s); + --s; + } + permute_inv8(data); + threefish_invmix((uint8_t*)data + 0, r0[i%8]); + threefish_invmix((uint8_t*)data + 16, r1[i%8]); + threefish_invmix((uint8_t*)data + 32, r2[i%8]); + threefish_invmix((uint8_t*)data + 48, r3[i%8]); + ++i; + }while(i!=72); + add_key_8(data, ctx, s); +} +*/ +I = 2 +S = 3 +DATA0 = 4 +DATA1 = 5 +CTX0 = 6 +CTX1 = 7 +IDX0 = 8 +IDX1 = 9 +IDX2 = 10 +IDX3 = 11 +IDX4 = 12 +IDX5 = 13 +IDX6 = 14 +IDX7 = 15 +/* + * param data: r24:r25 + * param ctx: r22:r23 + */ +.global threefish512_dec +threefish512_dec: + push r28 + push r29 + push_range 2, 17 + movw DATA0, r24 + movw CTX0, r22 + clr I + ldi r26, 18 + mov S, r26 +1: + mov r30, I + andi r30, 0x03 + breq 2f + rjmp 4f +2: + ldi r30, lo8(threefish512_slut9) + ldi r31, hi8(threefish512_slut9) + add r30, S + adc r31, r1 + lpm IDX0, Z+ + lpm IDX1, Z+ + lpm IDX2, Z+ + lpm IDX3, Z+ + lpm IDX4, Z+ + lpm IDX5, Z+ + lpm IDX6, Z+ + lpm IDX7, Z + movw r30, CTX0 + movw r26, DATA0 + add r30, IDX0 + adc r31, r1 + rcall sub_z_from_x8 + movw r30, CTX0 + add r30, IDX1 + adc r31, r1 + rcall sub_z_from_x8 + movw r30, CTX0 + add r30, IDX2 + adc r31, r1 + rcall sub_z_from_x8 + movw r30, CTX0 + add r30, IDX3 + adc r31, r1 + rcall sub_z_from_x8 + movw r30, CTX0 + add r30, IDX4 + adc r31, r1 + rcall sub_z_from_x8 + movw r30, CTX0 + add r30, IDX5 + adc r31, r1 + rcall sub_z_from_x8 + movw r30, CTX0 + add r30, IDX6 + adc r31, r1 + rcall sub_z_from_x8 + movw r30, CTX0 + add r30, IDX7 + adc r31, r1 + rcall sub_z_from_x8 + + /* now the remaining key */ + sbiw r26, 3*8 + ldi r30, lo8(threefish512_slut3) + ldi r31, hi8(threefish512_slut3) + add r30, S + adc r31, r1 + lpm IDX0, Z+ + lpm IDX1, Z + movw r30, CTX0 + adiw r30, 7*8 /* make Z pointing to (extended tweak) */ + adiw r30, 2*8 + movw IDX2, r30 + add r30, IDX0 + adc r31, r1 + rcall sub_z_from_x8 + movw r30, IDX2 + add r30, IDX1 + adc r31, r1 + rcall sub_z_from_x8 + ld r0, X + sub r0, S + st X+, r0 + ld r0, X + sbc r0, r1 + st X+, r0 + ld r0, X + sbc r0, r1 + st X+, r0 + ld r0, X + sbc r0, r1 + st X+, r0 + ld r0, X + sbc r0, r1 + st X+, r0 + ld r0, X + sbc r0, r1 + st X+, r0 + ld r0, X + sbc r0, r1 + st X+, r0 + ld r0, X + sbc r0, r1 + st X+, r0 + tst S + brne 3f +exit: + pop_range 2, 17 + pop r29 + pop r28 + ret +3: + dec S +4: + /* now the permutation */ + movw r26, DATA0 + movw r30, DATA0 + adiw r30, 6*8 + rcall xchg_zx8 + movw r26, DATA0 + adiw r26, 6*8 + movw r30, DATA0 + adiw r30, 4*8 + rcall xchg_zx8 + movw r26, DATA0 + adiw r26, 2*8 + movw r30, DATA0 + adiw r30, 4*8 + rcall xchg_zx8 + movw r26, DATA0 + adiw r26, 3*8 + movw r30, DATA0 + adiw r30, 7*8 + rcall xchg_zx8 + /* call mix */ + ldi r30, lo8(threefish512_rc0) + ldi r31, hi8(threefish512_rc0) + mov r26, I + andi r26, 0x07 + add r30, r26 + adc r31, r1 + lpm r22, Z + adiw r30, 8 + lpm IDX0, Z + adiw r30, 8 + lpm IDX1, Z + push IDX1 + adiw r30, 8 + lpm IDX1, Z + + movw r24, DATA0 + call threefish_invmix_asm /* no rcall? */ + movw r24, DATA0 + adiw r24, 16 + mov r22, IDX0 + call threefish_invmix_asm /* no rcall? */ + movw r24, DATA0 + adiw r24, 32 + pop r22 + ;mov r22, IDX0 + call threefish_invmix_asm /* no rcall? */ + movw r24, DATA0 + adiw r24, 48 + mov r22, IDX1 + call threefish_invmix_asm /* no rcall? */ + inc I + rjmp 1b + +threefish512_slut9: + .byte 0x00, 0x08, 0x10, 0x18, 0x20, 0x28, 0x30, 0x38 + .byte 0x40, 0x00, 0x08, 0x10, 0x18, 0x20, 0x28, 0x30 + .byte 0x38, 0x40, 0x00, 0x08, 0x10, 0x18, 0x20, 0x28 + .byte 0x30, 0x38, 0x40 +threefish512_slut3: + .byte 0x00, 0x08, 0x10, 0x00, 0x08, 0x10, 0x00, 0x08 + .byte 0x10, 0x00, 0x08, 0x10, 0x00, 0x08, 0x10, 0x00 + .byte 0x08, 0x10, 0x00, 0x08, 0x10, 0x00, 0x08 + +threefish512_rc0: .byte 0x41, 0x4b, 0x59, 0x41, 0x32, 0x42, 0x60, 0x5a +threefish512_rc1: .byte 0x63, 0x32, 0x33, 0x61, 0x14, 0x2a, 0x24, 0x4a +threefish512_rc2: .byte 0x59, 0x13, 0x51, 0x10, 0x72, 0x29, 0x53, 0x62 +threefish512_rc3: .byte 0x43, 0x11, 0x2a, 0x52, 0x19, 0x33, 0x49, 0x7b + +sub_z_from_x8: + ld r0, Z+ + ld r1, X + sub r1, r0 + st X+, r1 + ld r0, Z+ + ld r1, X + sbc r1, r0 + st X+, r1 + ld r0, Z+ + ld r1, X + sbc r1, r0 + st X+, r1 + ld r0, Z+ + ld r1, X + sbc r1, r0 + st X+, r1 + ld r0, Z+ + ld r1, X + sbc r1, r0 + st X+, r1 + ld r0, Z+ + ld r1, X + sbc r1, r0 + st X+, r1 + ld r0, Z+ + ld r1, X + sbc r1, r0 + st X+, r1 + ld r0, Z+ + ld r1, X + sbc r1, r0 + st X+, r1 + clr r1 + ret + +T0 = IDX0 +T1 = 0 +CNT = 24 +xchg_zx8: + ldi CNT, 8 +1: ld T0, X + ld T1, Z + st X+, T1 + st Z+, T0 + dec CNT + brne 1b + ret + + +