printf("%+d: %4d\n", i, ss_hist[i])
end
+puts "\ntransformed:"
(0..shift_values.length-1).each{|i|
+ puts " for 256 bit:" if i==0
+ puts " for 512 bit:" if i==16
+ puts " for 1024 bit:" if i==16+32
+
a = transform_shift(shift_values[i])
a[0] = transform_singleshift(a[0])
printf("0x%01x%01x, ", a[1], a[0])
puts("") if (i%8==7)
}
+
+
+puts "\ntransformed (decryption):"
+(0..shift_values.length-1).each{|i|
+ puts " for 256 bit:" if i==0
+ puts " for 512 bit:" if i==16
+ puts " for 1024 bit:" if i==16+32
+
+ a = transform_shift(shift_values[(i/8)*8+7-(i%8)])
+ a[0] = transform_singleshift(a[0])
+ printf("0x%01x%01x, ", a[1], a[0])
+ puts("") if (i%8==7)
+}
$(ALGO_NAME)_OBJ := threefish256_enc_asm.o threefish512_enc_asm.o threefish1024_enc_asm.o\
- threefish_mix.o threefish_mix_4c.o threefish_invmix_c.o\
- threefish256_dec.o threefish512_dec.o threefish1024_dec.o
+ threefish_mix.o threefish_invmix.o \
+ threefish256_dec_asm.o threefish512_dec_asm.o threefish1024_dec_asm.o
$(ALGO_NAME)_TEST_BIN := main-threefish-test.o debug.o uart.o hexdigit_tab.o \
nessie_bc_test.o dbz_strings.o nessie_common.o cli.o string-extras.o performance_test.o
$(ALGO_NAME)_NESSIE_TEST := test nessie
threefish256_enc(data, &ctx);
cli_putstr_P(PSTR("\r\ncipher: "));
cli_hexdump_block(data, 32, 4, 16);
+ cli_putstr_P(PSTR("\r\ndecipher: "));
+ threefish256_dec(data, &ctx);
+ cli_hexdump_block(data, 32, 4, 16);
+
/* second test */
for(i=0; i<32; ++i){
key[i] = 0x10+i;
threefish256_enc(data, &ctx);
cli_putstr_P(PSTR("\r\ncipher: "));
cli_hexdump_block(data, 32, 4, 16);
+ cli_putstr_P(PSTR("\r\ndecipher: "));
+ threefish256_dec(data, &ctx);
+ cli_hexdump_block(data, 32, 4, 16);
}
void testrun_stdtest_threefish512(void){
threefish512_enc(data, &ctx);
cli_putstr_P(PSTR("\r\ncipher: "));
cli_hexdump_block(data, 64, 4, 16);
+ threefish512_dec(data, &ctx);
+ cli_putstr_P(PSTR("\r\ndecipher: "));
+ cli_hexdump_block(data, 64, 4, 16);
+
for(i=0; i<64; ++i){
key[i] = 0x10+i;
threefish512_enc(data, &ctx);
cli_putstr_P(PSTR("\r\ncipher: "));
cli_hexdump_block(data, 64, 4, 16);
+ threefish512_dec(data, &ctx);
+ cli_putstr_P(PSTR("\r\ndecipher: "));
+ cli_hexdump_block(data, 64, 4, 16);
+
}
void testrun_stdtest_threefish1024(void){
threefish1024_enc(data, &ctx);
cli_putstr_P(PSTR("\r\ncipher: "));
cli_hexdump_block(data, 128, 4, 16);
+ threefish1024_dec(data, &ctx);
+ cli_putstr_P(PSTR("\r\ndecipher: "));
+ cli_hexdump_block(data, 128, 4, 16);
for(i=0; i<128; ++i){
key[i] = 0x10+i;
threefish1024_enc(data, &ctx);
cli_putstr_P(PSTR("\r\ncipher: "));
cli_hexdump_block(data, 128, 4, 16);
+ threefish1024_dec(data, &ctx);
+ cli_putstr_P(PSTR("\r\ndecipher: "));
+ cli_hexdump_block(data, 128, 4, 16);
}
ultoa((unsigned long)t, str, 10);
cli_putstr(str);
+ startTimer(1);
+ threefish256_dec(data, &ctx);
+ t = stopTimer();
+ cli_putstr_P(PSTR("\r\n\tdecrypt time: "));
+ ultoa((unsigned long)t, str, 10);
+ cli_putstr(str);
cli_putstr_P(PSTR("\r\n"));
}
ultoa((unsigned long)t, str, 10);
cli_putstr(str);
+ startTimer(1);
+ threefish512_dec(data, &ctx);
+ t = stopTimer();
+ cli_putstr_P(PSTR("\r\n\tdecrypt time: "));
+ ultoa((unsigned long)t, str, 10);
+ cli_putstr(str);
+
cli_putstr_P(PSTR("\r\n"));
}
ultoa((unsigned long)t, str, 10);
cli_putstr(str);
+ startTimer(1);
+ threefish1024_dec(data, &ctx);
+ t = stopTimer();
+ cli_putstr_P(PSTR("\r\n\tdecrypt time: "));
+ ultoa((unsigned long)t, str, 10);
+ cli_putstr(str);
+
cli_putstr_P(PSTR("\r\n"));
}
--- /dev/null
+/* threefish1024_enc_asm.S */
+/*
+ This file is part of the AVR-Crypto-Lib.
+ Copyright (C) 2009 Daniel Otte (daniel.otte@rub.de)
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+/*
+ * \author Daniel Otte
+ * \email daniel.otte@rub.de
+ * \date 2009-03-24
+ * \license GPLv3 or later
+ */
+
+#include "avr-asm-macros.S"
+
+/******************************************************************************/
+/*
+void permute_inv16(void* data){
+ uint64_t t;
+ t = X(15);
+ X(15) = X(7);
+ X(7) = X(9);
+ X(9) = X(1);
+ X(1) = t;
+ t = X(11);
+ X(11) = X(5);
+ X(5) = X(13);
+ X(13) = X(3);
+ X(3) = t;
+ t = X(4);
+ X(4) = X(6);
+ X(6) = t;
+ t = X(14);
+ X(14) = X(12);
+ X(12) = X(10);
+ X(10) = X(8);
+ X(8) = t;
+}
+void add_key_16(void* data, const threefish1024_ctx_t* ctx, uint8_t s){
+ uint8_t i;
+ for(i=0; i<13; ++i){
+ X(i) -= ctx->k[(s+i)%17];
+ }
+ X(13) -= ctx->k[(s+13)%17] + ctx->t[s%3];
+ X(14) -= ctx->k[(s+14)%17] + ctx->t[(s+1)%3];
+ X(15) -= ctx->k[(s+15)%17] + s;
+}
+void threefish1024_dec(void* data, const threefish1024_ctx_t* ctx){
+ uint8_t i=0,s=20;
+ uint8_t r0[8] = {0x69, 0x72, 0x21, 0x34, 0x42, 0x41, 0x31, 0x79};
+ uint8_t r1[8] = {0x61, 0x19, 0x1a, 0x19, 0x53, 0x10, 0x31, 0x53};
+ uint8_t r2[8] = {0x33, 0x40, 0x22, 0x69, 0x31, 0x22, 0x6a, 0x5b};
+ uint8_t r3[8] = {0x72, 0x6b, 0x31, 0x60, 0x74, 0x71, 0x2b, 0x50};
+ uint8_t r4[8] = {0x5b, 0x23, 0x53, 0x63, 0x54, 0x3b, 0x2a, 0x20};
+ uint8_t r5[8] = {0x60, 0x22, 0x52, 0x11, 0x11, 0x14, 0x2b, 0x3a};
+ uint8_t r6[8] = {0x7b, 0x02, 0x50, 0x43, 0x73, 0x40, 0x64, 0x5a};
+ uint8_t r7[8] = {0x70, 0x70, 0x29, 0x51, 0x42, 0x7a, 0x71, 0x14};
+
+ do{
+ if(i%4==0){
+ add_key_16(data, ctx, s);
+ --s;
+ }
+ permute_inv16(data);
+ threefish_invmix((uint8_t*)data + 0, r0[i%8]);
+ threefish_invmix((uint8_t*)data + 16, r1[i%8]);
+ threefish_invmix((uint8_t*)data + 32, r2[i%8]);
+ threefish_invmix((uint8_t*)data + 48, r3[i%8]);
+ threefish_invmix((uint8_t*)data + 64, r4[i%8]);
+ threefish_invmix((uint8_t*)data + 80, r5[i%8]);
+ threefish_invmix((uint8_t*)data + 96, r6[i%8]);
+ threefish_invmix((uint8_t*)data +112, r7[i%8]);
+ ++i;
+ }while(i!=80);
+ add_key_16(data, ctx, s);
+}
+*/
+I = 2
+S = 3
+DATA0 = 4
+DATA1 = 5
+CTX0 = 6
+CTX1 = 7
+IDX0 = 8
+IDX1 = 9
+IDX2 = 10
+IDX3 = 11
+IDX4 = 12
+IDX5 = 13
+IDX6 = 14
+IDX7 = 15
+
+/*
+ * param data: r24:r25
+ * param ctx: r22:r23
+ */
+.global threefish1024_dec
+threefish1024_dec:
+ push r28
+ push r29
+ push_range 2, 17
+ movw DATA0, r24
+ movw CTX0, r22
+ clr I
+ ldi r26, 20
+ mov S, r26
+1:
+ mov r30, I
+ andi r30, 0x03
+ breq 2f
+ rjmp 4f
+2:
+ ldi r30, lo8(threefish1024_slut17)
+ ldi r31, hi8(threefish1024_slut17)
+ add r30, S
+ adc r31, r1
+ lpm IDX0, Z+
+ lpm IDX1, Z+
+ lpm IDX2, Z+
+ lpm IDX3, Z+
+ lpm IDX4, Z+
+ lpm IDX5, Z+
+ lpm IDX6, Z+
+ lpm IDX7, Z
+ movw r30, CTX0
+ movw r26, DATA0
+ add r30, IDX0
+ adc r31, r1
+ rcall sub_z_from_x8
+ movw r30, CTX0
+ add r30, IDX1
+ adc r31, r1
+ rcall sub_z_from_x8
+ movw r30, CTX0
+ add r30, IDX2
+ adc r31, r1
+ rcall sub_z_from_x8
+ movw r30, CTX0
+ add r30, IDX3
+ adc r31, r1
+ rcall sub_z_from_x8
+ movw r30, CTX0
+ add r30, IDX4
+ adc r31, r1
+ rcall sub_z_from_x8
+ movw r30, CTX0
+ add r30, IDX5
+ adc r31, r1
+ rcall sub_z_from_x8
+ movw r30, CTX0
+ add r30, IDX6
+ adc r31, r1
+ rcall sub_z_from_x8
+ movw r30, CTX0
+ add r30, IDX7
+ adc r31, r1
+ rcall sub_z_from_x8
+ /* second half */
+ ldi r30, lo8(threefish1024_slut17)
+ ldi r31, hi8(threefish1024_slut17)
+ add r30, S
+ adc r31, r1
+ adiw r30, 8
+ lpm IDX0, Z+
+ lpm IDX1, Z+
+ lpm IDX2, Z+
+ lpm IDX3, Z+
+ lpm IDX4, Z+
+ lpm IDX5, Z+
+ lpm IDX6, Z+
+ lpm IDX7, Z
+ movw r30, CTX0
+ add r30, IDX0
+ adc r31, r1
+ rcall sub_z_from_x8
+ movw r30, CTX0
+ add r30, IDX1
+ adc r31, r1
+ rcall sub_z_from_x8
+ movw r30, CTX0
+ add r30, IDX2
+ adc r31, r1
+ rcall sub_z_from_x8
+ movw r30, CTX0
+ add r30, IDX3
+ adc r31, r1
+ rcall sub_z_from_x8
+ movw r30, CTX0
+ add r30, IDX4
+ adc r31, r1
+ rcall sub_z_from_x8
+ movw r30, CTX0
+ add r30, IDX5
+ adc r31, r1
+ rcall sub_z_from_x8
+ movw r30, CTX0
+ add r30, IDX6
+ adc r31, r1
+ rcall sub_z_from_x8
+ movw r30, CTX0
+ add r30, IDX7
+ adc r31, r1
+ rcall sub_z_from_x8
+ /* now the remaining key */
+ sbiw r26, 3*8
+ ldi r30, lo8(threefish1024_slut3)
+ ldi r31, hi8(threefish1024_slut3)
+ add r30, S
+ adc r31, r1
+ lpm IDX0, Z+
+ lpm IDX1, Z
+ movw r30, CTX0
+ adiw r30, 7*8 /* make Z pointing to (extended tweak) */
+ adiw r30, 7*8
+ adiw r30, 3*8
+ movw IDX2, r30
+ add r30, IDX0
+ adc r31, r1
+ rcall sub_z_from_x8
+ movw r30, IDX2
+ add r30, IDX1
+ adc r31, r1
+ rcall sub_z_from_x8
+ ld r0, X
+ sub r0, S
+ st X+, r0
+ ld r0, X
+ sbc r0, r1
+ st X+, r0
+ ld r0, X
+ sbc r0, r1
+ st X+, r0
+ ld r0, X
+ sbc r0, r1
+ st X+, r0
+ ld r0, X
+ sbc r0, r1
+ st X+, r0
+ ld r0, X
+ sbc r0, r1
+ st X+, r0
+ ld r0, X
+ sbc r0, r1
+ st X+, r0
+ ld r0, X
+ sbc r0, r1
+ st X+, r0
+ tst S
+ brne 3f
+exit:
+ pop_range 2, 17
+ pop r29
+ pop r28
+ ret
+3:
+ dec S
+4:
+ /* now the permutation */
+ movw r26, DATA0 /* X1 <-> X15 */
+ adiw r26, 1*8
+ movw r30, DATA0
+ adiw r30, 7*8+4
+ adiw r30, 7*8+4
+ rcall xchg_zx8
+ movw r26, DATA0 /* X15 <-> X7 */
+ adiw r26, 7*8+4
+ adiw r26, 7*8+4
+ movw r30, DATA0
+ adiw r30, 7*8
+ rcall xchg_zx8
+ movw r26, DATA0 /* X9 <-> X7 */
+ adiw r26, 7*8
+ adiw r26, 2*8
+ movw r30, DATA0
+ adiw r30, 7*8
+ rcall xchg_zx8
+ /* --- */
+ movw r26, DATA0 /* X3 <-> X11 */
+ adiw r26, 3*8
+ movw r30, DATA0
+ adiw r30, 7*8
+ adiw r30, 4*8
+ rcall xchg_zx8
+ movw r26, DATA0 /* X11 <-> X5 */
+ adiw r26, 7*8
+ adiw r26, 4*8
+ movw r30, DATA0
+ adiw r30, 5*8
+ rcall xchg_zx8
+ movw r26, DATA0 /* X13 <-> X5 */
+ adiw r26, 7*8
+ adiw r26, 6*8
+ movw r30, DATA0
+ adiw r30, 5*8
+ rcall xchg_zx8
+ /* --- */
+ movw r26, DATA0 /* X8 <-> X14 */
+ adiw r26, 7*8
+ adiw r26, 1*8
+ movw r30, DATA0
+ adiw r30, 7*8
+ adiw r30, 7*8
+ rcall xchg_zx8
+ movw r26, DATA0 /* X14 <-> X12 */
+ adiw r26, 7*8
+ adiw r26, 7*8
+ movw r30, DATA0
+ adiw r30, 7*8
+ adiw r30, 5*8
+ rcall xchg_zx8
+ movw r26, DATA0 /* X10 <-> X12 */
+ adiw r26, 7*8
+ adiw r26, 3*8
+ movw r30, DATA0
+ adiw r30, 7*8
+ adiw r30, 5*8
+ rcall xchg_zx8
+ /* --- */
+ movw r26, DATA0 /* X4 <-> X6 */
+ adiw r26, 4*8
+ movw r30, DATA0
+ adiw r30, 6*8
+ rcall xchg_zx8
+
+ /* call mix */
+ ldi r30, lo8(threefish1024_rc0)
+ ldi r31, hi8(threefish1024_rc0)
+ mov r26, I
+ andi r26, 0x07
+ add r30, r26
+ adc r31, r1
+ lpm r22, Z
+ adiw r30, 8
+ lpm IDX0, Z
+ adiw r30, 8
+ lpm IDX1, Z
+ adiw r30, 8
+ lpm IDX2, Z
+ adiw r30, 8
+ lpm IDX3, Z
+ adiw r30, 8
+ lpm IDX4, Z
+ adiw r30, 8
+ lpm IDX5, Z
+ adiw r30, 8
+ lpm IDX6, Z
+ push IDX6
+ push IDX5
+ push IDX4
+ push IDX3
+ push IDX2
+
+ movw r24, DATA0
+ call threefish_invmix_asm /* no rcall? */
+ movw r24, DATA0
+ adiw r24, 16
+ mov r22, IDX0
+ call threefish_invmix_asm /* no rcall? */
+ movw r24, DATA0
+ adiw r24, 32
+ mov r22, IDX1
+ call threefish_invmix_asm /* no rcall? */
+ movw r24, DATA0
+ adiw r24, 48
+ pop r22
+ call threefish_invmix_asm /* no rcall? */
+ movw r24, DATA0
+ adiw r24, 63
+ adiw r24, 1
+ pop r22
+ call threefish_invmix_asm /* no rcall? */
+ movw r24, DATA0
+ adiw r24, 63
+ adiw r24, 17
+ pop r22
+ call threefish_invmix_asm /* no rcall? */
+ movw r24, DATA0
+ adiw r24, 63
+ adiw r24, 33
+ pop r22
+ call threefish_invmix_asm /* no rcall? */
+ movw r24, DATA0
+ adiw r24, 63
+ adiw r24, 49
+ pop r22
+ call threefish_invmix_asm /* no rcall? */
+ inc I
+9:
+ rjmp 1b
+
+threefish1024_slut17:
+ .byte 0x00, 0x08, 0x10, 0x18, 0x20, 0x28, 0x30, 0x38
+ .byte 0x40, 0x48, 0x50, 0x58, 0x60, 0x68, 0x70, 0x78
+ .byte 0x80, 0x00, 0x08, 0x10, 0x18, 0x20, 0x28, 0x30
+ .byte 0x38, 0x40, 0x48, 0x50, 0x58, 0x60, 0x68, 0x70
+ .byte 0x78, 0x80, 0x00, 0x08, 0x10
+threefish1024_slut3:
+ .byte 0x00, 0x08, 0x10, 0x00, 0x08, 0x10, 0x00, 0x08
+ .byte 0x10, 0x00, 0x08, 0x10, 0x00, 0x08, 0x10, 0x00
+ .byte 0x08, 0x10, 0x00, 0x08, 0x10, 0x00, 0x08, 0x10
+ .byte 0x00
+
+threefish1024_rc0: .byte 0x69, 0x72, 0x21, 0x34, 0x42, 0x41, 0x31, 0x79
+threefish1024_rc1: .byte 0x61, 0x19, 0x1a, 0x19, 0x53, 0x10, 0x31, 0x53
+threefish1024_rc2: .byte 0x33, 0x40, 0x22, 0x69, 0x31, 0x22, 0x6a, 0x5b
+threefish1024_rc3: .byte 0x72, 0x6b, 0x31, 0x60, 0x74, 0x71, 0x2b, 0x50
+threefish1024_rc4: .byte 0x5b, 0x23, 0x53, 0x63, 0x54, 0x3b, 0x2a, 0x20
+threefish1024_rc5: .byte 0x60, 0x22, 0x52, 0x11, 0x11, 0x14, 0x2b, 0x3a
+threefish1024_rc6: .byte 0x7b, 0x02, 0x50, 0x43, 0x73, 0x40, 0x64, 0x5a
+threefish1024_rc7: .byte 0x70, 0x70, 0x29, 0x51, 0x42, 0x7a, 0x71, 0x14
+
+sub_z_from_x8:
+ ld r0, Z+
+ ld r1, X
+ sub r1, r0
+ st X+, r1
+ ld r0, Z+
+ ld r1, X
+ sbc r1, r0
+ st X+, r1
+ ld r0, Z+
+ ld r1, X
+ sbc r1, r0
+ st X+, r1
+ ld r0, Z+
+ ld r1, X
+ sbc r1, r0
+ st X+, r1
+ ld r0, Z+
+ ld r1, X
+ sbc r1, r0
+ st X+, r1
+ ld r0, Z+
+ ld r1, X
+ sbc r1, r0
+ st X+, r1
+ ld r0, Z+
+ ld r1, X
+ sbc r1, r0
+ st X+, r1
+ ld r0, Z+
+ ld r1, X
+ sbc r1, r0
+ st X+, r1
+ clr r1
+ ret
+
+T0 = IDX0
+T1 = 0
+CNT = 24
+xchg_zx8:
+ ldi CNT, 8
+1: ld T0, X
+ ld T1, Z
+ st X+, T1
+ st Z+, T0
+ dec CNT
+ brne 1b
+ ret
+
+
+
--- /dev/null
+/* threefish256_enc_asm.S */
+/*
+ This file is part of the AVR-Crypto-Lib.
+ Copyright (C) 2009 Daniel Otte (daniel.otte@rub.de)
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+/*
+ * \author Daniel Otte
+ * \email daniel.otte@rub.de
+ * \date 2009-03-16
+ * \license GPLv3 or later
+ */
+
+#include "avr-asm-macros.S"
+
+/******************************************************************************/
+/*
+void permute_4(void* data){
+ uint64_t t;
+ t = X(1);
+ X(1) = X(3);
+ X(3) = t;
+}
+void add_key_4(void* data, const threefish256_ctx_t* ctx, uint8_t s){
+ X(0) -= ctx->k[(s+0)%5];
+ X(1) -= ctx->k[(s+1)%5] + ctx->t[s%3];
+ X(2) -= ctx->k[(s+2)%5] + ctx->t[(s+1)%3];
+ X(3) -= ctx->k[(s+3)%5] + s;
+}
+void threefish256_dec(void* data, const threefish256_ctx_t* ctx){
+ uint8_t i=0,s=18;
+ uint8_t r0[8] = {0x73, 0x13, 0x7b, 0x32, 0x72, 0x2b, 0x44, 0x1b};
+ uint8_t r1[8] = {0x62, 0x52, 0x43, 0x24, 0x54, 0x6a, 0x34, 0x70};
+ do{
+ if(i%4==0){
+ add_key_4(data, ctx, s);
+ --s;
+ }
+ permute_4(data);
+ threefish_invmix(data, r0[i%8]);
+ threefish_invmix((uint8_t*)data + 16, r1[i%8]);
+ ++i;
+ }while(i!=72);
+ add_key_4(data, ctx, s);
+}
+*/
+I = 2
+S = 3
+DATA0 = 4
+DATA1 = 5
+CTX0 = 6
+CTX1 = 7
+IDX0 = 8
+IDX1 = 9
+IDX2 = 10
+IDX3 = 11
+/*
+ * param data: r24:r25
+ * param ctx: r22:r23
+ */
+.global threefish256_dec
+threefish256_dec:
+ push r28
+ push r29
+ push_range 2, 17
+ movw DATA0, r24
+ movw CTX0, r22
+ clr I
+ ldi r26, 18
+ mov S, r26
+1:
+ mov r30, I
+ andi r30, 0x03
+ breq 2f
+ rjmp 4f
+2:
+ ldi r30, lo8(threefish256_slut5)
+ ldi r31, hi8(threefish256_slut5)
+ add r30, S
+ adc r31, r1
+ lpm IDX0, Z+
+ lpm IDX1, Z+
+ lpm IDX2, Z+
+ lpm IDX3, Z
+ movw r30, CTX0
+ movw r26, DATA0
+ add r30, IDX0
+ adc r31, r1
+ rcall sub_z_from_x8
+ movw r30, CTX0
+ add r30, IDX1
+ adc r31, r1
+ rcall sub_z_from_x8
+ movw r30, CTX0
+ add r30, IDX2
+ adc r31, r1
+ rcall sub_z_from_x8
+ movw r30, CTX0
+ add r30, IDX3
+ adc r31, r1
+ rcall sub_z_from_x8
+
+ /* now the remaining key */
+ sbiw r26, 3*8
+ ldi r30, lo8(threefish256_slut3)
+ ldi r31, hi8(threefish256_slut3)
+ add r30, S
+ adc r31, r1
+ lpm IDX0, Z+
+ lpm IDX1, Z
+ movw r30, CTX0
+ adiw r30, 5*8
+ movw IDX2, r30
+ add r30, IDX0
+ adc r31, r1
+ rcall sub_z_from_x8
+ movw r30, IDX2
+ add r30, IDX1
+ adc r31, r1
+ rcall sub_z_from_x8
+ ld r0, X
+ sub r0, S
+ st X+, r0
+ ld r0, X
+ sbc r0, r1
+ st X+, r0
+ ld r0, X
+ sbc r0, r1
+ st X+, r0
+ ld r0, X
+ sbc r0, r1
+ st X+, r0
+ ld r0, X
+ sbc r0, r1
+ st X+, r0
+ ld r0, X
+ sbc r0, r1
+ st X+, r0
+ ld r0, X
+ sbc r0, r1
+ st X+, r0
+ ld r0, X
+ adc r0, r1
+ st X+, r0
+ tst S
+ brne 3f
+exit:
+ pop_range 2, 17
+ pop r29
+ pop r28
+ ret
+3:
+ dec S
+4:
+ /* now the permutation */
+ movw r26, DATA0
+ adiw r26, 8
+ movw r30, r26
+ adiw r30, 16
+ ld IDX0, X
+ ld IDX1, Z
+ st X+, IDX1
+ st Z+, IDX0
+ ld IDX0, X
+ ld IDX1, Z
+ st X+, IDX1
+ st Z+, IDX0
+ ld IDX0, X
+ ld IDX1, Z
+ st X+, IDX1
+ st Z+, IDX0
+ ld IDX0, X
+ ld IDX1, Z
+ st X+, IDX1
+ st Z+, IDX0
+ ld IDX0, X
+ ld IDX1, Z
+ st X+, IDX1
+ st Z+, IDX0
+ ld IDX0, X
+ ld IDX1, Z
+ st X+, IDX1
+ st Z+, IDX0
+ ld IDX0, X
+ ld IDX1, Z
+ st X+, IDX1
+ st Z+, IDX0
+ ld IDX0, X
+ ld IDX1, Z
+ st X+, IDX1
+ st Z+, IDX0
+ /* call mix */
+ ldi r30, lo8(threefish256_rc0)
+ ldi r31, hi8(threefish256_rc0)
+ mov r26, I
+ andi r26, 0x07
+ add r30, r26
+ adc r31, r1
+ lpm r22, Z
+ adiw r30, 8
+ lpm IDX0, Z
+ movw r24, DATA0
+ call threefish_invmix_asm /* no rcall? */
+ movw r24, DATA0
+ adiw r24, 16
+ mov r22, IDX0
+ call threefish_invmix_asm /* no rcall? */
+ inc I
+ rjmp 1b
+
+threefish256_slut5:
+ .byte 0x00, 0x08, 0x10, 0x18, 0x20, 0x00, 0x08, 0x10
+ .byte 0x18, 0x20, 0x00, 0x08, 0x10, 0x18, 0x20, 0x00
+ .byte 0x08, 0x10, 0x18, 0x20, 0x00, 0x08, 0x10
+threefish256_slut3:
+ .byte 0x00, 0x08, 0x10, 0x00, 0x08, 0x10, 0x00, 0x08
+ .byte 0x10, 0x00, 0x08, 0x10, 0x00, 0x08, 0x10, 0x00
+ .byte 0x08, 0x10, 0x00, 0x08, 0x10, 0x00, 0x08
+
+threefish256_rc0: .byte 0x73, 0x13, 0x7b, 0x32, 0x72, 0x2b, 0x44, 0x1b
+threefish256_rc1: .byte 0x62, 0x52, 0x43, 0x24, 0x54, 0x6a, 0x34, 0x70
+
+sub_z_from_x8:
+ ld r0, Z+
+ ld r1, X
+ sub r1, r0
+ st X+, r1
+ ld r0, Z+
+ ld r1, X
+ sbc r1, r0
+ st X+, r1
+ ld r0, Z+
+ ld r1, X
+ sbc r1, r0
+ st X+, r1
+ ld r0, Z+
+ ld r1, X
+ sbc r1, r0
+ st X+, r1
+ ld r0, Z+
+ ld r1, X
+ sbc r1, r0
+ st X+, r1
+ ld r0, Z+
+ ld r1, X
+ sbc r1, r0
+ st X+, r1
+ ld r0, Z+
+ ld r1, X
+ sbc r1, r0
+ st X+, r1
+ ld r0, Z+
+ ld r1, X
+ sbc r1, r0
+ st X+, r1
+ clr r1
+ ret
+
+
+
+
+
+
+
+
+
+
--- /dev/null
+/* threefish512_enc_asm.S */
+/*
+ This file is part of the AVR-Crypto-Lib.
+ Copyright (C) 2009 Daniel Otte (daniel.otte@rub.de)
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+/*
+ * \author Daniel Otte
+ * \email daniel.otte@rub.de
+ * \date 2009-03-24
+ * \license GPLv3 or later
+ */
+
+#include "avr-asm-macros.S"
+
+/******************************************************************************/
+/*
+#define X(a) (((uint64_t*)data)[(a)])
+
+
+static
+void permute_inv8(void* data){
+ uint64_t t;
+ t = X(6);
+ X(6) = X(4);
+ X(4) = X(2);
+ X(2) = X(0);
+ X(0) = t;
+ t = X(7);
+ X(7) = X(3);
+ X(3) = t;
+}
+
+static
+void add_key_8(void* data, const threefish512_ctx_t* ctx, uint8_t s){
+ uint8_t i;
+ for(i=0; i<5; ++i){
+ X(i) -= ctx->k[(s+i)%9];
+ }
+ X(5) -= ctx->k[(s+5)%9] + ctx->t[s%3];
+ X(6) -= ctx->k[(s+6)%9] + ctx->t[(s+1)%3];
+ X(7) -= ctx->k[(s+7)%9] + s;
+}
+
+void threefish512_dec(void* data, const threefish512_ctx_t* ctx){
+ uint8_t i=0,s=18;
+ uint8_t r0[8] = {0x41, 0x4b, 0x59, 0x41, 0x32, 0x42, 0x60, 0x5a};
+ uint8_t r1[8] = {0x63, 0x32, 0x33, 0x61, 0x14, 0x2a, 0x24, 0x4a};
+ uint8_t r2[8] = {0x59, 0x13, 0x51, 0x10, 0x72, 0x29, 0x53, 0x62};
+ uint8_t r3[8] = {0x43, 0x11, 0x2a, 0x52, 0x19, 0x33, 0x49, 0x7b};
+ do{
+ if(i%4==0){
+ add_key_8(data, ctx, s);
+ --s;
+ }
+ permute_inv8(data);
+ threefish_invmix((uint8_t*)data + 0, r0[i%8]);
+ threefish_invmix((uint8_t*)data + 16, r1[i%8]);
+ threefish_invmix((uint8_t*)data + 32, r2[i%8]);
+ threefish_invmix((uint8_t*)data + 48, r3[i%8]);
+ ++i;
+ }while(i!=72);
+ add_key_8(data, ctx, s);
+}
+*/
+I = 2
+S = 3
+DATA0 = 4
+DATA1 = 5
+CTX0 = 6
+CTX1 = 7
+IDX0 = 8
+IDX1 = 9
+IDX2 = 10
+IDX3 = 11
+IDX4 = 12
+IDX5 = 13
+IDX6 = 14
+IDX7 = 15
+/*
+ * param data: r24:r25
+ * param ctx: r22:r23
+ */
+.global threefish512_dec
+threefish512_dec:
+ push r28
+ push r29
+ push_range 2, 17
+ movw DATA0, r24
+ movw CTX0, r22
+ clr I
+ ldi r26, 18
+ mov S, r26
+1:
+ mov r30, I
+ andi r30, 0x03
+ breq 2f
+ rjmp 4f
+2:
+ ldi r30, lo8(threefish512_slut9)
+ ldi r31, hi8(threefish512_slut9)
+ add r30, S
+ adc r31, r1
+ lpm IDX0, Z+
+ lpm IDX1, Z+
+ lpm IDX2, Z+
+ lpm IDX3, Z+
+ lpm IDX4, Z+
+ lpm IDX5, Z+
+ lpm IDX6, Z+
+ lpm IDX7, Z
+ movw r30, CTX0
+ movw r26, DATA0
+ add r30, IDX0
+ adc r31, r1
+ rcall sub_z_from_x8
+ movw r30, CTX0
+ add r30, IDX1
+ adc r31, r1
+ rcall sub_z_from_x8
+ movw r30, CTX0
+ add r30, IDX2
+ adc r31, r1
+ rcall sub_z_from_x8
+ movw r30, CTX0
+ add r30, IDX3
+ adc r31, r1
+ rcall sub_z_from_x8
+ movw r30, CTX0
+ add r30, IDX4
+ adc r31, r1
+ rcall sub_z_from_x8
+ movw r30, CTX0
+ add r30, IDX5
+ adc r31, r1
+ rcall sub_z_from_x8
+ movw r30, CTX0
+ add r30, IDX6
+ adc r31, r1
+ rcall sub_z_from_x8
+ movw r30, CTX0
+ add r30, IDX7
+ adc r31, r1
+ rcall sub_z_from_x8
+
+ /* now the remaining key */
+ sbiw r26, 3*8
+ ldi r30, lo8(threefish512_slut3)
+ ldi r31, hi8(threefish512_slut3)
+ add r30, S
+ adc r31, r1
+ lpm IDX0, Z+
+ lpm IDX1, Z
+ movw r30, CTX0
+ adiw r30, 7*8 /* make Z pointing to (extended tweak) */
+ adiw r30, 2*8
+ movw IDX2, r30
+ add r30, IDX0
+ adc r31, r1
+ rcall sub_z_from_x8
+ movw r30, IDX2
+ add r30, IDX1
+ adc r31, r1
+ rcall sub_z_from_x8
+ ld r0, X
+ sub r0, S
+ st X+, r0
+ ld r0, X
+ sbc r0, r1
+ st X+, r0
+ ld r0, X
+ sbc r0, r1
+ st X+, r0
+ ld r0, X
+ sbc r0, r1
+ st X+, r0
+ ld r0, X
+ sbc r0, r1
+ st X+, r0
+ ld r0, X
+ sbc r0, r1
+ st X+, r0
+ ld r0, X
+ sbc r0, r1
+ st X+, r0
+ ld r0, X
+ sbc r0, r1
+ st X+, r0
+ tst S
+ brne 3f
+exit:
+ pop_range 2, 17
+ pop r29
+ pop r28
+ ret
+3:
+ dec S
+4:
+ /* now the permutation */
+ movw r26, DATA0
+ movw r30, DATA0
+ adiw r30, 6*8
+ rcall xchg_zx8
+ movw r26, DATA0
+ adiw r26, 6*8
+ movw r30, DATA0
+ adiw r30, 4*8
+ rcall xchg_zx8
+ movw r26, DATA0
+ adiw r26, 2*8
+ movw r30, DATA0
+ adiw r30, 4*8
+ rcall xchg_zx8
+ movw r26, DATA0
+ adiw r26, 3*8
+ movw r30, DATA0
+ adiw r30, 7*8
+ rcall xchg_zx8
+ /* call mix */
+ ldi r30, lo8(threefish512_rc0)
+ ldi r31, hi8(threefish512_rc0)
+ mov r26, I
+ andi r26, 0x07
+ add r30, r26
+ adc r31, r1
+ lpm r22, Z
+ adiw r30, 8
+ lpm IDX0, Z
+ adiw r30, 8
+ lpm IDX1, Z
+ push IDX1
+ adiw r30, 8
+ lpm IDX1, Z
+
+ movw r24, DATA0
+ call threefish_invmix_asm /* no rcall? */
+ movw r24, DATA0
+ adiw r24, 16
+ mov r22, IDX0
+ call threefish_invmix_asm /* no rcall? */
+ movw r24, DATA0
+ adiw r24, 32
+ pop r22
+ ;mov r22, IDX0
+ call threefish_invmix_asm /* no rcall? */
+ movw r24, DATA0
+ adiw r24, 48
+ mov r22, IDX1
+ call threefish_invmix_asm /* no rcall? */
+ inc I
+ rjmp 1b
+
+threefish512_slut9:
+ .byte 0x00, 0x08, 0x10, 0x18, 0x20, 0x28, 0x30, 0x38
+ .byte 0x40, 0x00, 0x08, 0x10, 0x18, 0x20, 0x28, 0x30
+ .byte 0x38, 0x40, 0x00, 0x08, 0x10, 0x18, 0x20, 0x28
+ .byte 0x30, 0x38, 0x40
+threefish512_slut3:
+ .byte 0x00, 0x08, 0x10, 0x00, 0x08, 0x10, 0x00, 0x08
+ .byte 0x10, 0x00, 0x08, 0x10, 0x00, 0x08, 0x10, 0x00
+ .byte 0x08, 0x10, 0x00, 0x08, 0x10, 0x00, 0x08
+
+threefish512_rc0: .byte 0x41, 0x4b, 0x59, 0x41, 0x32, 0x42, 0x60, 0x5a
+threefish512_rc1: .byte 0x63, 0x32, 0x33, 0x61, 0x14, 0x2a, 0x24, 0x4a
+threefish512_rc2: .byte 0x59, 0x13, 0x51, 0x10, 0x72, 0x29, 0x53, 0x62
+threefish512_rc3: .byte 0x43, 0x11, 0x2a, 0x52, 0x19, 0x33, 0x49, 0x7b
+
+sub_z_from_x8:
+ ld r0, Z+
+ ld r1, X
+ sub r1, r0
+ st X+, r1
+ ld r0, Z+
+ ld r1, X
+ sbc r1, r0
+ st X+, r1
+ ld r0, Z+
+ ld r1, X
+ sbc r1, r0
+ st X+, r1
+ ld r0, Z+
+ ld r1, X
+ sbc r1, r0
+ st X+, r1
+ ld r0, Z+
+ ld r1, X
+ sbc r1, r0
+ st X+, r1
+ ld r0, Z+
+ ld r1, X
+ sbc r1, r0
+ st X+, r1
+ ld r0, Z+
+ ld r1, X
+ sbc r1, r0
+ st X+, r1
+ ld r0, Z+
+ ld r1, X
+ sbc r1, r0
+ st X+, r1
+ clr r1
+ ret
+
+T0 = IDX0
+T1 = 0
+CNT = 24
+xchg_zx8:
+ ldi CNT, 8
+1: ld T0, X
+ ld T1, Z
+ st X+, T1
+ st Z+, T0
+ dec CNT
+ brne 1b
+ ret
+
+
+
--- /dev/null
+/* threefish_invmix.S */
+/*
+ This file is part of the AVR-Crypto-Lib.
+ Copyright (C) 2009 Daniel Otte (daniel.otte@rub.de)
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+/*
+ * \author Daniel Otte
+ * \email daniel.otte@rub.de
+ * \date 2009-03-21
+ * \license GPLv3 or later
+ */
+
+#include "avr-asm-macros.S"
+
+/*
+#define X0 (((uint64_t*)data)[0])
+#define X1 (((uint64_t*)data)[1])
+void threefish_invmix(void* data, uint8_t rot){
+ uint64_t x;
+ x = X1;
+ x ^= X0;
+ X1 = ((x>>rot)|(x<<(64-rot)));
+ X0 -= X1;
+}
+*/
+A0 = 10
+A1 = 11
+A2 = 12
+A3 = 13
+A4 = 14
+A5 = 15
+A6 = 16
+A7 = 17
+
+B0 = 18
+B1 = 19
+B2 = 20
+B3 = 21
+B4 = 22
+B5 = 23
+B6 = 24
+B7 = 25
+vROT = 27
+/*
+ * param data: r24:r25
+ * param rot: r22
+ */
+
+.global threefish_invmix_asm
+threefish_invmix_asm:
+ movw r28, r24
+ mov vROT,r22
+ ldd A0, Y+ 0
+ ldd A1, Y+ 1
+ ldd A2, Y+ 2
+ ldd A3, Y+ 3
+ ldd A4, Y+ 4
+ ldd A5, Y+ 5
+ ldd A6, Y+ 6
+ ldd A7, Y+ 7
+ ldd B0, Y+ 8
+ ldd B1, Y+ 9
+ ldd B2, Y+10
+ ldd B3, Y+11
+ ldd B4, Y+12
+ ldd B5, Y+13
+ ldd B6, Y+14
+ ldd B7, Y+15
+ eor B0, A0
+ eor B1, A1
+ eor B2, A2
+ eor B3, A3
+ eor B4, A4
+ eor B5, A5
+ eor B6, A6
+ eor B7, A7
+
+ mov r26, vROT
+ swap r26
+ andi r26, 0x07
+ ldi r30, pm_lo8(byte_rot_jmptable)
+ ldi r31, pm_hi8(byte_rot_jmptable)
+ add r30, r26
+ adc r31, r1
+ ijmp
+post_byterot:
+ bst vROT, 3
+ andi vROT, 0x07
+ brts 1f
+ rjmp bit_rotr
+1: rjmp bit_rotl
+post_bitrot:
+ sub A0, B0
+ sbc A1, B1
+ sbc A2, B2
+ sbc A3, B3
+ sbc A4, B4
+ sbc A5, B5
+ sbc A6, B6
+ sbc A7, B7
+
+ std Y+ 0, A0
+ std Y+ 1, A1
+ std Y+ 2, A2
+ std Y+ 3, A3
+ std Y+ 4, A4
+ std Y+ 5, A5
+ std Y+ 6, A6
+ std Y+ 7, A7
+ std Y+ 8, B0
+ std Y+ 9, B1
+ std Y+10, B2
+ std Y+11, B3
+ std Y+12, B4
+ std Y+13, B5
+ std Y+14, B6
+ std Y+15, B7
+exit:
+ ret
+
+byte_rot_jmptable:
+ rjmp post_byterot;ret; rjmp byte_rotr_0
+ rjmp byte_rotr_1
+ rjmp byte_rotr_2
+ rjmp byte_rotr_3
+ rjmp byte_rotr_4
+ rjmp byte_rotr_5
+ rjmp byte_rotr_6
+ rjmp byte_rotr_7
+ rjmp post_byterot;ret; rjmp byte_rotr_0
+
+
+
+; 0 1 2 3 4 5 6 7
+; 1 2 3 4 5 6 7 0
+;.global byte_rotr_1
+;.global byte_rotr_0
+byte_rotr_1: /* 10 words */
+ mov r0, B0
+ mov B0, B1
+ mov B1, B2
+ mov B2, B3
+ mov B3, B4
+ mov B4, B5
+ mov B5, B6
+ mov B6, B7
+ mov B7, r0
+byte_rotr_0:
+ rjmp post_byterot
+
+; 0 1 2 3 4 5 6 7
+; 2 3 4 5 6 7 0 1
+;.global byte_rotr_2
+byte_rotr_2: /* 11 words */
+ mov r0, B0
+ mov B0, B2
+ mov B2, B4
+ mov B4, B6
+ mov B6, r0
+ mov r0, B1
+ mov B1, B3
+ mov B3, B5
+ mov B5, B7
+ mov B7, r0
+ rjmp post_byterot
+
+; 0 1 2 3 4 5 6 7
+; 3 4 5 6 7 0 1 2
+;.global byte_rotr_3
+byte_rotr_3: /* 10 words */
+ mov r0, B0
+ mov B0, B3
+ mov B3, B6
+ mov B6, B1
+ mov B1, B4
+ mov B4, B7
+ mov B7, B2
+ mov B2, B5
+ mov B5, r0
+ rjmp post_byterot
+
+; 0 1 2 3 4 5 6 7
+; 4 5 6 7 0 1 2 3
+;.global byte_rotr_4
+byte_rotr_4: /* 13 words */
+ mov r0, B0
+ mov B0, B4
+ mov B4, r0
+
+ mov r0, B1
+ mov B1, B5
+ mov B5, r0
+
+ mov r0, B2
+ mov B2, B6
+ mov B6, r0
+
+ mov r0, B3
+ mov B3, B7
+ mov B7, r0
+ rjmp post_byterot
+
+; 0 1 2 3 4 5 6 7
+; 5 6 7 0 1 2 3 4
+;.global byte_rotr_5
+byte_rotr_5: /* 10 words */
+ mov r0, B0
+ mov B0, B5
+ mov B5, B2
+ mov B2, B7
+ mov B7, B4
+ mov B4, B1
+ mov B1, B6
+ mov B6, B3
+ mov B3, r0
+ rjmp post_byterot
+
+; 0 1 2 3 4 5 6 7
+; 6 7 0 1 2 3 4 5
+;.global byte_rotr_6
+byte_rotr_6: /* 11 words */
+ mov r0, B0
+ mov B0, B6
+ mov B6, B4
+ mov B4, B2
+ mov B2, r0
+
+ mov r0, B1
+ mov B1, B7
+ mov B7, B5
+ mov B5, B3
+ mov B3, r0
+ rjmp post_byterot
+
+; 0 1 2 3 4 5 6 7
+; 7 0 1 2 3 4 5 6
+;.global byte_rotr_7
+byte_rotr_7: /* 10 words */
+ mov r0, B7
+ mov B7, B6
+ mov B6, B5
+ mov B5, B4
+ mov B4, B3
+ mov B3, B2
+ mov B2, B1
+ mov B1, B0
+ mov B0, r0
+ rjmp post_byterot
+
+;.global bit_rotl
+bit_rotl:
+ tst vROT
+ brne 1f
+ rjmp post_bitrot
+1: mov r0, B7
+ rol r0
+ rol B0
+ rol B1
+ rol B2
+ rol B3
+ rol B4
+ rol B5
+ rol B6
+ rol B7
+ dec vROT
+ rjmp bit_rotl
+
+;.global bit_rotr
+bit_rotr:
+ tst vROT
+ brne 1f
+ rjmp post_bitrot
+1: mov r0, B0
+ ror r0
+ ror B7
+ ror B6
+ ror B5
+ ror B4
+ ror B3
+ ror B2
+ ror B1
+ ror B0
+ dec vROT
+ rjmp bit_rotr
+
+
; 0 1 2 3 4 5 6 7
; 1 2 3 4 5 6 7 0
-.global byte_rotr_1
-.global byte_rotr_0
+;.global byte_rotr_1
+;.global byte_rotr_0
byte_rotr_1: /* 10 words */
mov r0, B0
mov B0, B1
; 0 1 2 3 4 5 6 7
; 2 3 4 5 6 7 0 1
-.global byte_rotr_2
+;.global byte_rotr_2
byte_rotr_2: /* 11 words */
mov r0, B0
mov B0, B2
; 0 1 2 3 4 5 6 7
; 3 4 5 6 7 0 1 2
-.global byte_rotr_3
+;.global byte_rotr_3
byte_rotr_3: /* 10 words */
mov r0, B0
mov B0, B3
; 0 1 2 3 4 5 6 7
; 4 5 6 7 0 1 2 3
-.global byte_rotr_4
+;.global byte_rotr_4
byte_rotr_4: /* 13 words */
mov r0, B0
mov B0, B4
; 0 1 2 3 4 5 6 7
; 5 6 7 0 1 2 3 4
-.global byte_rotr_5
+;.global byte_rotr_5
byte_rotr_5: /* 10 words */
mov r0, B0
mov B0, B5
; 0 1 2 3 4 5 6 7
; 6 7 0 1 2 3 4 5
-.global byte_rotr_6
+;.global byte_rotr_6
byte_rotr_6: /* 11 words */
mov r0, B0
mov B0, B6
; 0 1 2 3 4 5 6 7
; 7 0 1 2 3 4 5 6
-.global byte_rotr_7
+;.global byte_rotr_7
byte_rotr_7: /* 10 words */
mov r0, B7
mov B7, B6
mov B0, r0
rjmp post_byterot
-.global bit_rotl
+;.global bit_rotl
bit_rotl:
tst vROT
brne 1f
dec vROT
rjmp bit_rotl
-.global bit_rotr
+;.global bit_rotr
bit_rotr:
tst vROT
brne 1f