+/* threefish_mix.S */
+/*
+ This file is part of the AVR-Crypto-Lib.
+ Copyright (C) 2009 Daniel Otte (daniel.otte@rub.de)
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+/*
+ * \author Daniel Otte
+ * \email daniel.otte@rub.de
+ * \date 2009-03-16
+ * \license GPLv3 or later
+ */
+
+#include "avr-asm-macros.S"
+
+/******************************************************************************/
+A0 = 14
+A1 = 15
+A2 = 16
+A3 = 17
+A4 = 18
+A5 = 19
+A6 = 20
+A7 = 21
+/*
+#define THREEFISH_KEY_CONST 0x5555.5555.5555.5555.LL / * 2**64/3 * /
+
+#define K(s) (((uint64_t*)key)[(s)])
+#define T(s) (((uint64_t*)tweak)[(s)])
+
+void threefish256_init(void* key, void* tweak, threefish256_ctx_t* ctx){
+ memcpy(ctx->k, key, 4*8);
+ memcpy(ctx->t, tweak, 2*8);
+ uint8_t i;
+ ctx->k[4] = THREEFISH_KEY_CONST;
+ for(i=0; i<4; ++i){
+ ctx->k[4] ^= K(i);
+ }
+ ctx->t[2] = T(0) ^ T(1);
+}
+*/
+/*
+ * param key: r24:r25
+ * param tweak: r22:r23
+ * param ctx: r20:r21
+ */
+.global threefish256_init
+threefish256_init:
+ push_range 14, 17
+ movw r30, r20
+ movw r26, r24
+ ldi r24, 4
+ ldi A7, 0x55
+ mov A6, A7
+ movw A4, A6
+ movw A2, A6
+ movw A0, A6
+1:
+ ld r0, X+
+ st Z+, r0
+ eor A0, r0
+ ld r0, X+
+ st Z+, r0
+ eor A1, r0
+ ld r0, X+
+ st Z+, r0
+ eor A2, r0
+ ld r0, X+
+ st Z+, r0
+ eor A3, r0
+ ld r0, X+
+ st Z+, r0
+ eor A4, r0
+ ld r0, X+
+ st Z+, r0
+ eor A5, r0
+ ld r0, X+
+ st Z+, r0
+ eor A6, r0
+ ld r0, X+
+ st Z+, r0
+ eor A7, r0
+ dec r24
+ brne 1b
+ st Z+, A0
+ st Z+, A1
+ st Z+, A2
+ st Z+, A3
+ st Z+, A4
+ st Z+, A5
+ st Z+, A6
+ st Z+, A7
+ /* now the tweak */
+ movw r26, r22
+ ld A0, X+
+ ld A1, X+
+ ld A2, X+
+ ld A3, X+
+ ld A4, X+
+ ld A5, X+
+ ld A6, X+
+ ld A7, X+
+ st Z+, A0
+ st Z+, A1
+ st Z+, A2
+ st Z+, A3
+ st Z+, A4
+ st Z+, A5
+ st Z+, A6
+ st Z+, A7
+ ld r0, X+
+ eor A0, r0
+ st Z+, r0
+ ld r0, X+
+ eor A1, r0
+ st Z+, r0
+ ld r0, X+
+ eor A2, r0
+ st Z+, r0
+ ld r0, X+
+ eor A3, r0
+ st Z+, r0
+ ld r0, X+
+ eor A4, r0
+ st Z+, r0
+ ld r0, X+
+ eor A5, r0
+ st Z+, r0
+ ld r0, X+
+ eor A6, r0
+ st Z+, r0
+ ld r0, X+
+ eor A7, r0
+ st Z+, r0
+ st Z+, A0
+ st Z+, A1
+ st Z+, A2
+ st Z+, A3
+ st Z+, A4
+ st Z+, A5
+ st Z+, A6
+ st Z+, A7
+ pop_range 14, 17
+ ret
+
+/******************************************************************************/
+/*
+#define X(a) (((uint64_t*)data)[(a)])
+void permute_4(void* data){
+ uint64_t t;
+ t = X(1);
+ X(1) = X(3);
+ X(3) = t;
+}
+void add_key_4(void* data, threefish256_ctx_t* ctx, uint8_t s){ /* s: 0..19 * /
+ X(0) += ctx->k[(s+0)%5];
+ X(1) += ctx->k[(s+1)%5] + ctx->t[s%3];
+ X(2) += ctx->k[(s+2)%5] + ctx->t[(s+1)%3];
+ X(3) += ctx->k[(s+3)%5] + s;
+}
+void threefish256_enc(void* data, threefish256_ctx_t* ctx){
+ uint8_t i=0,s=0;
+ uint8_t r0[8] = { 5, 36, 13, 58, 26, 53, 11, 59};
+ uint8_t r1[8] = {56, 28, 46, 44, 20, 35, 42, 50};
+ do{
+ if(i%4==0){
+ add_key_4(data, ctx, s);
+ ++s;
+ }
+ threefish_mix(data, r0[i%8]);
+ threefish_mix((uint8_t*)data + 16, r1[i%8]);
+ permute_4(data);
+ ++i;
+ }while(i!=72);
+ add_key_4(data, ctx, s);
+}
+*/
+I = 2
+S = 3
+DATA0 = 4
+DATA1 = 5
+CTX0 = 6
+CTX1 = 7
+IDX0 = 8
+IDX1 = 9
+IDX2 = 10
+IDX3 = 11
+/*
+ * param data: r24:r25
+ * param ctx: r22:r23
+ */
+.global threefish256_enc
+threefish256_enc:
+ push r28
+ push r29
+ push_range 2, 17
+ movw DATA0, r24
+ movw CTX0, r22
+ clr I
+ clr S
+1:
+ mov r30, I
+ andi r30, 0x03
+ breq 2f
+ rjmp 4f
+2:
+ ldi r30, lo8(threefish256_slut5)
+ ldi r31, hi8(threefish256_slut5)
+ mov r26, S
+ add r30, r26
+ adc r31, r1
+ lpm IDX0, Z+
+ lpm IDX1, Z+
+ lpm IDX2, Z+
+ lpm IDX3, Z
+ movw r30, CTX0
+ movw r26, DATA0
+ add r30, IDX0
+ adc r31, r1
+ rcall add_z_to_x8
+ movw r30, CTX0
+ add r30, IDX1
+ adc r31, r1
+ rcall add_z_to_x8
+ movw r30, CTX0
+ add r30, IDX2
+ adc r31, r1
+ rcall add_z_to_x8
+ movw r30, CTX0
+ add r30, IDX3
+ adc r31, r1
+ rcall add_z_to_x8
+
+ /* now the remaining key */
+ sbiw r26, 3*8
+ ldi r30, lo8(threefish256_slut3)
+ ldi r31, hi8(threefish256_slut3)
+ add r30, S
+ adc r31, r1
+ lpm IDX0, Z+
+ lpm IDX1, Z
+ movw r30, CTX0
+ adiw r30, 5*8
+ movw IDX2, r30
+ add r30, IDX0
+ adc r31, r1
+ rcall add_z_to_x8
+ movw r30, IDX2
+ add r30, IDX1
+ adc r31, r1
+ rcall add_z_to_x8
+ ld r0, X
+ add r0, S
+ st X+, r0
+ ld r0, X
+ adc r0, r1
+ st X+, r0
+ ld r0, X
+ adc r0, r1
+ st X+, r0
+ ld r0, X
+ adc r0, r1
+ st X+, r0
+ ld r0, X
+ adc r0, r1
+ st X+, r0
+ ld r0, X
+ adc r0, r1
+ st X+, r0
+ ld r0, X
+ adc r0, r1
+ st X+, r0
+ ld r0, X
+ adc r0, r1
+ st X+, r0
+ inc S
+ mov r26, S
+ cpi r26, 19
+ brmi 4f
+exit:
+ pop_range 2, 17
+ pop r29
+ pop r28
+ ret
+4:
+ /* call mix */
+ ldi r30, lo8(threefish256_rc0)
+ ldi r31, hi8(threefish256_rc0)
+ mov r26, I
+ andi r26, 0x07
+ add r30, r26
+ adc r31, r1
+ lpm r22, Z
+ adiw r30, 8
+ lpm IDX0, Z
+ movw r24, DATA0
+ call threefish_mix_asm /* no rcall? */
+ movw r24, DATA0
+ adiw r24, 16
+ mov r22, IDX0
+ call threefish_mix_asm /* no rcall? */
+ /* now the permutation */
+ movw r26, DATA0
+ adiw r26, 8
+ movw r30, r26
+ adiw r30, 16
+ ld IDX0, X
+ ld IDX1, Z
+ st X+, IDX1
+ st Z+, IDX0
+ ld IDX0, X
+ ld IDX1, Z
+ st X+, IDX1
+ st Z+, IDX0
+ ld IDX0, X
+ ld IDX1, Z
+ st X+, IDX1
+ st Z+, IDX0
+ ld IDX0, X
+ ld IDX1, Z
+ st X+, IDX1
+ st Z+, IDX0
+ ld IDX0, X
+ ld IDX1, Z
+ st X+, IDX1
+ st Z+, IDX0
+ ld IDX0, X
+ ld IDX1, Z
+ st X+, IDX1
+ st Z+, IDX0
+ ld IDX0, X
+ ld IDX1, Z
+ st X+, IDX1
+ st Z+, IDX0
+ ld IDX0, X
+ ld IDX1, Z
+ st X+, IDX1
+ st Z+, IDX0
+ inc I
+ rjmp 1b
+
+threefish256_slut5:
+ .byte 0x00, 0x08, 0x10, 0x18, 0x20, 0x00, 0x08, 0x10
+ .byte 0x18, 0x20, 0x00, 0x08, 0x10, 0x18, 0x20, 0x00
+ .byte 0x08, 0x10, 0x18, 0x20, 0x00, 0x08, 0x10
+threefish256_slut3:
+ .byte 0x00, 0x08, 0x10, 0x00, 0x08, 0x10, 0x00, 0x08
+ .byte 0x10, 0x00, 0x08, 0x10, 0x00, 0x08, 0x10, 0x00
+ .byte 0x08, 0x10, 0x00, 0x08, 0x10, 0x00, 0x08
+;threefish256_rc0: .byte 5, 36, 13, 58, 26, 53, 11, 59
+;threefish256_rc1: .byte 56, 28, 46, 44, 20, 35, 42, 50
+threefish256_rc0: .byte 0x1b, 0x44, 0x2b, 0x72, 0x32, 0x7b, 0x13, 0x73
+threefish256_rc1: .byte 0x70, 0x34, 0x6a, 0x54, 0x24, 0x43, 0x52, 0x62
+
+add_z_to_x8:
+ ld r0, Z+
+ ld r1, X
+ add r1, r0
+ st X+, r1
+ ld r0, Z+
+ ld r1, X
+ adc r1, r0
+ st X+, r1
+ ld r0, Z+
+ ld r1, X
+ adc r1, r0
+ st X+, r1
+ ld r0, Z+
+ ld r1, X
+ adc r1, r0
+ st X+, r1
+ ld r0, Z+
+ ld r1, X
+ adc r1, r0
+ st X+, r1
+ ld r0, Z+
+ ld r1, X
+ adc r1, r0
+ st X+, r1
+ ld r0, Z+
+ ld r1, X
+ adc r1, r0
+ st X+, r1
+ ld r0, Z+
+ ld r1, X
+ adc r1, r0
+ st X+, r1
+ clr r1
+ ret
+
+
+
+
+
+
+
+
+
+