From: bg Date: Wed, 25 Mar 2009 01:19:17 +0000 (+0000) Subject: threefish encrytion in 512 and 1024 bit in assembler X-Git-Url: https://git.cryptolib.org/?a=commitdiff_plain;h=f215d8ed649fd5bf5f85eb34f851a1539302013d;p=avr-crypto-lib.git threefish encrytion in 512 and 1024 bit in assembler --- diff --git a/host/optimize_shift.rb b/host/optimize_shift.rb new file mode 100644 index 0000000..e667697 --- /dev/null +++ b/host/optimize_shift.rb @@ -0,0 +1,82 @@ +#!/usr/bin/ruby +# shavs_test.rb +=begin + This file is part of the AVR-Crypto-Lib. + Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +=end + +shift_values = [ 5, 36, 13, 58, 26, 53, 11, 59, + 56, 28, 46, 44, 20, 35, 42, 50, + + 38, 48, 34, 26, 33, 39, 29, 33, + 30, 20, 14, 12, 49, 27, 26, 51, + 50, 43, 15, 58, 8, 41, 11, 39, + 53, 31, 27, 7, 42, 14, 9, 35, + + 55, 25, 33, 34, 28, 17, 58, 47, + 43, 25, 8, 43, 7, 6, 7, 49, + 37, 46, 18, 25, 47, 18, 32, 27, + 40, 13, 57, 60, 48, 25, 45, 58, + 16, 14, 21, 44, 51, 43, 19, 37, + 22, 13, 12, 9, 9, 42, 18, 48, + 38, 52, 32, 59, 35, 40, 2, 53, + 12, 57, 54, 34, 41, 15, 56, 56 ] + +def transform_shift(value) + byteshift = (value+3)/8 + singleshift = value%8 + if singleshift>4 + # byteshift += 1 + singleshift -= 8 + end + return [singleshift, byteshift] +end + +def transform_singleshift(value) + if(value>=0) + return value + end + return 0x08+(value*-1) +end + +bs_hist = Hash.new +bs_hist.default = 0 +ss_hist = Hash.new +ss_hist.default = 0 +shift_values.each{|v| + + a = transform_shift(v) + printf("%2d = %2d * 8 %+2d\n", v, a[1], a[0]) + bs_hist[a[1]] += 1 + ss_hist[a[0]] += 1 +} + +puts("byteshift histogram:") +for i in 0..7 + printf("%d: %4d\n", i, bs_hist[i]) + end + +puts("singleshift histogram:") +for i in -3..4 + printf("%+d: %4d\n", i, ss_hist[i]) + end + +(0..shift_values.length-1).each{|i| + a = transform_shift(shift_values[i]) + a[0] = transform_singleshift(a[0]) + printf("0x%01x%01x, ", a[1], a[0]) + puts("") if (i%8==7) +} diff --git a/host/threefish_helper.rb b/host/threefish_helper.rb new file mode 100644 index 0000000..b15a26b --- /dev/null +++ b/host/threefish_helper.rb @@ -0,0 +1,28 @@ +#!/usr/bin/ruby +# + +(0..19).each { |s| + printf("0x%s%s, ", ((s+0)%5).to_s,((s+1)%5).to_s) + printf("0x%s%s, ", ((s+2)%5).to_s,((s+3)%5).to_s) +} + +puts("\n or (5)\n") +(0..19+3).each { |s| + printf("0x%02x, ", ((s%5)*8)) +} + +puts("\n or (9)\n") +(0..19+7).each { |s| + printf("0x%02x, ", ((s%9)*8)) +} + +puts("\n or (17)\n") +(0..21+15).each { |s| + printf("0x%02x, ", ((s%17)*8)) +} + + +puts("\n (3)\n") +(0..24).each { |s| + printf("0x%02x, ", ((s%3)*8)) +} diff --git a/mkfiles/threefish.mk b/mkfiles/threefish.mk index fa58d23..d46769e 100644 --- a/mkfiles/threefish.mk +++ b/mkfiles/threefish.mk @@ -5,9 +5,9 @@ ALGO_NAME := THREEFISH BLOCK_CIPHERS += $(ALGO_NAME) -$(ALGO_NAME)_OBJ := threefish256_enc_asm.o threefish512_enc.o threefish1024_enc.o\ +$(ALGO_NAME)_OBJ := threefish256_enc_asm.o threefish512_enc_asm.o threefish1024_enc_asm.o\ threefish_mix.o threefish_mix_4c.o threefish_invmix_c.o\ - threefish256_dec.o + threefish256_dec.o threefish512_dec.o threefish1024_dec.o $(ALGO_NAME)_TEST_BIN := main-threefish-test.o debug.o uart.o hexdigit_tab.o \ nessie_bc_test.o dbz_strings.o nessie_common.o cli.o string-extras.o performance_test.o $(ALGO_NAME)_NESSIE_TEST := test nessie diff --git a/test_src/main-threefish-test.c b/test_src/main-threefish-test.c index 1dba3ac..f5d10eb 100644 --- a/test_src/main-threefish-test.c +++ b/test_src/main-threefish-test.c @@ -47,7 +47,7 @@ void threefish256_dummy_init(const uint8_t* key, uint16_t keysize_b, void* ctx){ threefish256_init(key, null, ctx); } -void testrun_nessie_threefish(void){ +void testrun_nessie_threefish256(void){ nessie_bc_ctx.keysize_b = 256; nessie_bc_ctx.blocksize_B = 32; nessie_bc_ctx.ctx_size_B = sizeof(threefish256_ctx_t); @@ -60,6 +60,50 @@ void testrun_nessie_threefish(void){ nessie_bc_run(); } +void threefish512_dummy_init(const uint8_t* key, uint16_t keysize_b, void* ctx){ + uint8_t null[16]; + memset(null, 0, 16); + threefish512_init(key, null, ctx); +} + +void testrun_nessie_threefish512(void){ + nessie_bc_ctx.keysize_b = 512; + nessie_bc_ctx.blocksize_B = 64; + nessie_bc_ctx.ctx_size_B = sizeof(threefish512_ctx_t); + nessie_bc_ctx.name = "Threefish512"; + nessie_bc_ctx.cipher_genctx = threefish512_dummy_init; + nessie_bc_ctx.cipher_enc = (nessie_bc_enc_fpt)threefish512_enc; + nessie_bc_ctx.cipher_dec = (nessie_bc_dec_fpt)threefish512_dec; + nessie_bc_ctx.cipher_free = NULL; + + nessie_bc_run(); +} + +void threefish1024_dummy_init(const uint8_t* key, uint16_t keysize_b, void* ctx){ + uint8_t null[16]; + memset(null, 0, 16); + threefish1024_init(key, null, ctx); +} + +void testrun_nessie_threefish1024(void){ + nessie_bc_ctx.keysize_b = 1024; + nessie_bc_ctx.blocksize_B = 128; + nessie_bc_ctx.ctx_size_B = sizeof(threefish1024_ctx_t); + nessie_bc_ctx.name = "Threefish1024"; + nessie_bc_ctx.cipher_genctx = threefish1024_dummy_init; + nessie_bc_ctx.cipher_enc = (nessie_bc_enc_fpt)threefish1024_enc; + nessie_bc_ctx.cipher_dec = (nessie_bc_dec_fpt)threefish1024_dec; + nessie_bc_ctx.cipher_free = NULL; + + nessie_bc_run(); +} + +void testrun_nessie_threefish(void){ + testrun_nessie_threefish256(); + testrun_nessie_threefish512(); + testrun_nessie_threefish1024(); +} + void testrun_stdtest_threefish256(void){ uint8_t key[32], data[32]; uint8_t tweak[16]; @@ -81,16 +125,6 @@ void testrun_stdtest_threefish256(void){ threefish256_enc(data, &ctx); cli_putstr_P(PSTR("\r\ncipher: ")); cli_hexdump(data, 32); - /* - cli_hexdump_rev(data, 8); - cli_putc(' '); - cli_hexdump_rev(data+8, 8); - cli_putc(' '); - cli_hexdump_rev(data+16, 8); - cli_putc(' '); - cli_hexdump_rev(data+24, 8); - cli_putc(' '); - */ /* second test */ for(i=0; i<32; ++i){ key[i] = 0x10+i; @@ -203,7 +237,7 @@ void testrun_stdtest_threefish1024(void){ cli_hexdump(data+64, 32); cli_putstr_P(PSTR("\r\n ")); cli_hexdump(data+96, 32); - + for(i=0; i<128; ++i){ key[i] = 0x10+i; data[i] = 0xFF-i; @@ -239,7 +273,7 @@ void testrun_stdtest_threefish1024(void){ cli_hexdump(data+64, 32); cli_putstr_P(PSTR("\r\n ")); cli_hexdump(data+96, 32); - } +} void testrun_stdtest_threefish(void){ @@ -372,6 +406,9 @@ void init_test(void){ const char nessie_str[] PROGMEM = "nessie"; const char test_str[] PROGMEM = "test"; +const char test256_str[] PROGMEM = "test256"; +const char test512_str[] PROGMEM = "test512"; +const char test1024_str[] PROGMEM = "test1024"; const char inittest_str[] PROGMEM = "inittest"; const char performance_str[] PROGMEM = "performance"; const char echo_str[] PROGMEM = "echo"; @@ -379,6 +416,9 @@ const char echo_str[] PROGMEM = "echo"; cmdlist_entry_t cmdlist[] PROGMEM = { { nessie_str, NULL, testrun_nessie_threefish}, { test_str, NULL, testrun_stdtest_threefish}, + { test256_str, NULL, testrun_stdtest_threefish256}, + { test512_str, NULL, testrun_stdtest_threefish512}, + { test1024_str, NULL, testrun_stdtest_threefish1024}, { inittest_str, NULL, init_test}, { performance_str, NULL, testrun_performance_threefish}, { echo_str, (void*)1, (void_fpt)echo_ctrl}, diff --git a/threefish.h b/threefish.h index eaa369c..fe3ce78 100644 --- a/threefish.h +++ b/threefish.h @@ -66,5 +66,7 @@ void threefish512_enc(void* data, const threefish512_ctx_t* ctx); void threefish1024_enc(void* data, const threefish1024_ctx_t* ctx); void threefish256_dec(void* data, const threefish256_ctx_t* ctx); +void threefish512_dec(void* data, const threefish512_ctx_t* ctx); +void threefish1024_dec(void* data, const threefish1024_ctx_t* ctx); #endif /* THREEFISH_H_ */ diff --git a/threefish1024_dec.c b/threefish1024_dec.c new file mode 100644 index 0000000..edb46d4 --- /dev/null +++ b/threefish1024_dec.c @@ -0,0 +1,96 @@ +/* threefish1024_enc.c */ +/* + This file is part of the AVR-Crypto-Lib. + Copyright (C) 2009 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ +/* + * \author Daniel Otte + * \email daniel.otte@rub.de + * \date 2009-03-12 + * \license GPLv3 or later + * + * + * + */ + +#include +#include +#include "threefish.h" + +#define X(a) (((uint64_t*)data)[(a)]) + +static +void permute_inv16(void* data){ + uint64_t t; + t = X(15); + X(15) = X(7); + X(7) = X(9); + X(9) = X(1); + X(1) = t; + t = X(11); + X(11) = X(5); + X(5) = X(13); + X(13) = X(3); + X(3) = t; + t = X(4); + X(4) = X(6); + X(6) = t; + t = X(14); + X(14) = X(12); + X(12) = X(10); + X(10) = X(8); + X(8) = t; +} + +static +void add_key_16(void* data, const threefish1024_ctx_t* ctx, uint8_t s){ + uint8_t i; + for(i=0; i<13; ++i){ + X(i) -= ctx->k[(s+i)%17]; + } + X(13) -= ctx->k[(s+13)%17] + ctx->t[s%3]; + X(14) -= ctx->k[(s+14)%17] + ctx->t[(s+1)%3]; + X(15) -= ctx->k[(s+15)%17] + s; +} + +void threefish1024_dec(void* data, const threefish1024_ctx_t* ctx){ + uint8_t i=0,s=20; + uint8_t r0[8] = {47, 58, 17, 28, 34, 33, 25, 55}; + uint8_t r1[8] = {49, 7, 6, 7, 43, 8, 25, 43}; + uint8_t r2[8] = {27, 32, 18, 47, 25, 18, 46, 37}; + uint8_t r3[8] = {58, 45, 25, 48, 60, 57, 13, 40}; + uint8_t r4[8] = {37, 19, 43, 51, 44, 21, 14, 16}; + uint8_t r5[8] = {48, 18, 42, 9, 9, 12, 13, 22}; + uint8_t r6[8] = {53, 2, 40, 35, 59, 32, 52, 38}; + uint8_t r7[8] = {56, 56, 15, 41, 34, 54, 57, 12}; + do{ + if(i%4==0){ + add_key_16(data, ctx, s); + --s; + } + permute_inv16(data); + threefish_invmix((uint8_t*)data + 0, r0[i%8]); + threefish_invmix((uint8_t*)data + 16, r1[i%8]); + threefish_invmix((uint8_t*)data + 32, r2[i%8]); + threefish_invmix((uint8_t*)data + 48, r3[i%8]); + threefish_invmix((uint8_t*)data + 64, r4[i%8]); + threefish_invmix((uint8_t*)data + 80, r5[i%8]); + threefish_invmix((uint8_t*)data + 96, r6[i%8]); + threefish_invmix((uint8_t*)data +112, r7[i%8]); + ++i; + }while(i!=80); + add_key_16(data, ctx, s); +} diff --git a/threefish1024_enc_asm.S b/threefish1024_enc_asm.S new file mode 100644 index 0000000..bb75f04 --- /dev/null +++ b/threefish1024_enc_asm.S @@ -0,0 +1,605 @@ +/* threefish1024_enc_asm.S */ +/* + This file is part of the AVR-Crypto-Lib. + Copyright (C) 2009 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ +/* + * \author Daniel Otte + * \email daniel.otte@rub.de + * \date 2009-03-24 + * \license GPLv3 or later + */ + +#include "avr-asm-macros.S" + +/******************************************************************************/ +A0 = 14 +A1 = 15 +A2 = 16 +A3 = 17 +A4 = 18 +A5 = 19 +A6 = 20 +A7 = 21 +/* +#define THREEFISH_KEY_CONST 0x5555.5555.5555.5555.LL / * 2**64/3 * / + +#define K(s) (((uint64_t*)key)[(s)]) +#define T(s) (((uint64_t*)tweak)[(s)]) +void threefish1024_init(const void* key, const void* tweak, threefish512_ctx_t* ctx){ + memcpy(ctx->k, key, 16*8); + memcpy(ctx->t, tweak, 2*8); + uint8_t i; + ctx->k[16] = THREEFISH_KEY_CONST; + for(i=0; i<8; ++i){ + ctx->k[16] ^= K(i); + } + ctx->t[2] = T(0) ^ T(1); +} +*/ +/* + * param key: r24:r25 + * param tweak: r22:r23 + * param ctx: r20:r21 + */ +.global threefish1024_init +threefish1024_init: + push_range 14, 17 + movw r30, r20 + movw r26, r24 + ldi r24, 16 + ldi A7, 0x55 + mov A6, A7 + movw A4, A6 + movw A2, A6 + movw A0, A6 +1: + ld r0, X+ + st Z+, r0 + eor A0, r0 + ld r0, X+ + st Z+, r0 + eor A1, r0 + ld r0, X+ + st Z+, r0 + eor A2, r0 + ld r0, X+ + st Z+, r0 + eor A3, r0 + ld r0, X+ + st Z+, r0 + eor A4, r0 + ld r0, X+ + st Z+, r0 + eor A5, r0 + ld r0, X+ + st Z+, r0 + eor A6, r0 + ld r0, X+ + st Z+, r0 + eor A7, r0 + dec r24 + brne 1b + st Z+, A0 + st Z+, A1 + st Z+, A2 + st Z+, A3 + st Z+, A4 + st Z+, A5 + st Z+, A6 + st Z+, A7 + /* now the tweak */ + movw r26, r22 + ld A0, X+ + ld A1, X+ + ld A2, X+ + ld A3, X+ + ld A4, X+ + ld A5, X+ + ld A6, X+ + ld A7, X+ + st Z+, A0 + st Z+, A1 + st Z+, A2 + st Z+, A3 + st Z+, A4 + st Z+, A5 + st Z+, A6 + st Z+, A7 + ld r0, X+ + eor A0, r0 + st Z+, r0 + ld r0, X+ + eor A1, r0 + st Z+, r0 + ld r0, X+ + eor A2, r0 + st Z+, r0 + ld r0, X+ + eor A3, r0 + st Z+, r0 + ld r0, X+ + eor A4, r0 + st Z+, r0 + ld r0, X+ + eor A5, r0 + st Z+, r0 + ld r0, X+ + eor A6, r0 + st Z+, r0 + ld r0, X+ + eor A7, r0 + st Z+, r0 + st Z+, A0 + st Z+, A1 + st Z+, A2 + st Z+, A3 + st Z+, A4 + st Z+, A5 + st Z+, A6 + st Z+, A7 + pop_range 14, 17 + ret + +/******************************************************************************/ +/* +#define X(a) (((uint64_t*)data)[(a)]) +void permute_16(void* data){ + uint64_t t; + t = X(1); + X(1) = X(9); + X(9) = X(7); + X(7) = X(15); + X(15) = t; + t = X(3); + X(3) = X(13); + X(13) = X(5); + X(5) = X(11); + X(11) = t; + t = X(4); + X(4) = X(6); + X(6) = t; + t = X(8); + X(8) = X(10); + X(10) = X(12); + X(12) = X(14); + X(14) = t; +} +void add_key_16(void* data, const threefish1024_ctx_t* ctx, uint8_t s){ + uint8_t i; + for(i=0; i<13; ++i){ + X(i) += ctx->k[(s+i)%17]; + } + X(13) += ctx->k[(s+13)%17] + ctx->t[s%3]; + X(14) += ctx->k[(s+14)%17] + ctx->t[(s+1)%3]; + X(15) += ctx->k[(s+15)%17] + s; +} +void threefish1024_enc(void* data, const threefish1024_ctx_t* ctx){ + uint8_t i=0,s=0; + uint8_t r0[8] = {55, 25, 33, 34, 28, 17, 58, 47}; + uint8_t r1[8] = {43, 25, 8, 43, 7, 6, 7, 49}; + uint8_t r2[8] = {37, 46, 18, 25, 47, 18, 32, 27}; + uint8_t r3[8] = {40, 13, 57, 60, 48, 25, 45, 58}; + uint8_t r4[8] = {16, 14, 21, 44, 51, 43, 19, 37}; + uint8_t r5[8] = {22, 13, 12, 9, 9, 42, 18, 48}; + uint8_t r6[8] = {38, 52, 32, 59, 35, 40, 2, 53}; + uint8_t r7[8] = {12, 57, 54, 34, 41, 15, 56, 56}; + do{ + if(i%4==0){ + add_key_16(data, ctx, s); + ++s; + } + threefish_mix((uint8_t*)data + 0, r0[i%8]); + threefish_mix((uint8_t*)data + 16, r1[i%8]); + threefish_mix((uint8_t*)data + 32, r2[i%8]); + threefish_mix((uint8_t*)data + 48, r3[i%8]); + threefish_mix((uint8_t*)data + 64, r4[i%8]); + threefish_mix((uint8_t*)data + 80, r5[i%8]); + threefish_mix((uint8_t*)data + 96, r6[i%8]); + threefish_mix((uint8_t*)data +112, r7[i%8]); + permute_16(data); + ++i; + }while(i!=80); + add_key_16(data, ctx, s); +} +*/ +I = 2 +S = 3 +DATA0 = 4 +DATA1 = 5 +CTX0 = 6 +CTX1 = 7 +IDX0 = 8 +IDX1 = 9 +IDX2 = 10 +IDX3 = 11 +IDX4 = 12 +IDX5 = 13 +IDX6 = 14 +IDX7 = 15 + +/* + * param data: r24:r25 + * param ctx: r22:r23 + */ +.global threefish1024_enc +threefish1024_enc: + push r28 + push r29 + push_range 2, 17 + movw DATA0, r24 + movw CTX0, r22 + clr I + clr S +1: + mov r30, I + andi r30, 0x03 + breq 2f + rjmp 4f +2: + ldi r30, lo8(threefish1024_slut17) + ldi r31, hi8(threefish1024_slut17) + add r30, S + adc r31, r1 + lpm IDX0, Z+ + lpm IDX1, Z+ + lpm IDX2, Z+ + lpm IDX3, Z+ + lpm IDX4, Z+ + lpm IDX5, Z+ + lpm IDX6, Z+ + lpm IDX7, Z + movw r30, CTX0 + movw r26, DATA0 + add r30, IDX0 + adc r31, r1 + rcall add_z_to_x8 + movw r30, CTX0 + add r30, IDX1 + adc r31, r1 + rcall add_z_to_x8 + movw r30, CTX0 + add r30, IDX2 + adc r31, r1 + rcall add_z_to_x8 + movw r30, CTX0 + add r30, IDX3 + adc r31, r1 + rcall add_z_to_x8 + movw r30, CTX0 + add r30, IDX4 + adc r31, r1 + rcall add_z_to_x8 + movw r30, CTX0 + add r30, IDX5 + adc r31, r1 + rcall add_z_to_x8 + movw r30, CTX0 + add r30, IDX6 + adc r31, r1 + rcall add_z_to_x8 + movw r30, CTX0 + add r30, IDX7 + adc r31, r1 + rcall add_z_to_x8 + /* second half */ + ldi r30, lo8(threefish1024_slut17) + ldi r31, hi8(threefish1024_slut17) + add r30, S + adc r31, r1 + adiw r30, 8 + lpm IDX0, Z+ + lpm IDX1, Z+ + lpm IDX2, Z+ + lpm IDX3, Z+ + lpm IDX4, Z+ + lpm IDX5, Z+ + lpm IDX6, Z+ + lpm IDX7, Z + movw r30, CTX0 + add r30, IDX0 + adc r31, r1 + rcall add_z_to_x8 + movw r30, CTX0 + add r30, IDX1 + adc r31, r1 + rcall add_z_to_x8 + movw r30, CTX0 + add r30, IDX2 + adc r31, r1 + rcall add_z_to_x8 + movw r30, CTX0 + add r30, IDX3 + adc r31, r1 + rcall add_z_to_x8 + movw r30, CTX0 + add r30, IDX4 + adc r31, r1 + rcall add_z_to_x8 + movw r30, CTX0 + add r30, IDX5 + adc r31, r1 + rcall add_z_to_x8 + movw r30, CTX0 + add r30, IDX6 + adc r31, r1 + rcall add_z_to_x8 + movw r30, CTX0 + add r30, IDX7 + adc r31, r1 + rcall add_z_to_x8 + /* now the remaining key */ + sbiw r26, 3*8 + ldi r30, lo8(threefish1024_slut3) + ldi r31, hi8(threefish1024_slut3) + add r30, S + adc r31, r1 + lpm IDX0, Z+ + lpm IDX1, Z + movw r30, CTX0 + adiw r30, 7*8 /* make Z pointing to (extended tweak) */ + adiw r30, 7*8 + adiw r30, 3*8 + movw IDX2, r30 + add r30, IDX0 + adc r31, r1 + rcall add_z_to_x8 + movw r30, IDX2 + add r30, IDX1 + adc r31, r1 + rcall add_z_to_x8 + ld r0, X + add r0, S + st X+, r0 + ld r0, X + adc r0, r1 + st X+, r0 + ld r0, X + adc r0, r1 + st X+, r0 + ld r0, X + adc r0, r1 + st X+, r0 + ld r0, X + adc r0, r1 + st X+, r0 + ld r0, X + adc r0, r1 + st X+, r0 + ld r0, X + adc r0, r1 + st X+, r0 + ld r0, X + adc r0, r1 + st X+, r0 + inc S + mov r26, S + cpi r26, 21 + brmi 4f +exit: + pop_range 2, 17 + pop r29 + pop r28 + ret +4: + /* call mix */ + ldi r30, lo8(threefish1024_rc0) + ldi r31, hi8(threefish1024_rc0) + mov r26, I + andi r26, 0x07 + add r30, r26 + adc r31, r1 + lpm r22, Z + adiw r30, 8 + lpm IDX0, Z + adiw r30, 8 + lpm IDX1, Z + adiw r30, 8 + lpm IDX2, Z + adiw r30, 8 + lpm IDX3, Z + adiw r30, 8 + lpm IDX4, Z + adiw r30, 8 + lpm IDX5, Z + adiw r30, 8 + lpm IDX6, Z + push IDX6 + push IDX5 + push IDX4 + push IDX3 + push IDX2 + + movw r24, DATA0 + call threefish_mix_asm /* no rcall? */ + movw r24, DATA0 + adiw r24, 16 + mov r22, IDX0 + call threefish_mix_asm /* no rcall? */ + movw r24, DATA0 + adiw r24, 32 + mov r22, IDX1 + call threefish_mix_asm /* no rcall? */ + movw r24, DATA0 + adiw r24, 48 + pop r22 + call threefish_mix_asm /* no rcall? */ + movw r24, DATA0 + adiw r24, 63 + adiw r24, 1 + pop r22 + call threefish_mix_asm /* no rcall? */ + movw r24, DATA0 + adiw r24, 63 + adiw r24, 17 + pop r22 + call threefish_mix_asm /* no rcall? */ + movw r24, DATA0 + adiw r24, 63 + adiw r24, 33 + pop r22 + call threefish_mix_asm /* no rcall? */ + movw r24, DATA0 + adiw r24, 63 + adiw r24, 49 + pop r22 + call threefish_mix_asm /* no rcall? */ + /* now the permutation */ + movw r26, DATA0 /* X1 <-> X15 */ + adiw r26, 1*8 + movw r30, DATA0 + adiw r30, 7*8+4 + adiw r30, 7*8+4 + rcall xchg_zx8 + movw r26, DATA0 /* X1 <-> X9 */ + adiw r26, 1*8 + movw r30, DATA0 + adiw r30, 7*8 + adiw r30, 2*8 + rcall xchg_zx8 + movw r26, DATA0 /* X9 <-> X7 */ + adiw r26, 7*8 + adiw r26, 2*8 + movw r30, DATA0 + adiw r30, 7*8 + rcall xchg_zx8 + /* --- */ + movw r26, DATA0 /* X3 <-> X11 */ + adiw r26, 3*8 + movw r30, DATA0 + adiw r30, 7*8 + adiw r30, 4*8 + rcall xchg_zx8 + movw r26, DATA0 /* X3 <-> X13 */ + adiw r26, 3*8 + movw r30, DATA0 + adiw r30, 7*8 + adiw r30, 6*8 + rcall xchg_zx8 + movw r26, DATA0 /* X13 <-> X5 */ + adiw r26, 7*8 + adiw r26, 6*8 + movw r30, DATA0 + adiw r30, 5*8 + rcall xchg_zx8 + /* --- */ + movw r26, DATA0 /* X8 <-> X14 */ + adiw r26, 7*8 + adiw r26, 1*8 + movw r30, DATA0 + adiw r30, 7*8 + adiw r30, 7*8 + rcall xchg_zx8 + movw r26, DATA0 /* X8 <-> X10 */ + adiw r26, 7*8 + adiw r26, 1*8 + movw r30, DATA0 + adiw r30, 7*8 + adiw r30, 3*8 + rcall xchg_zx8 + movw r26, DATA0 /* X10 <-> X12 */ + adiw r26, 7*8 + adiw r26, 3*8 + movw r30, DATA0 + adiw r30, 7*8 + adiw r30, 5*8 + rcall xchg_zx8 + /* --- */ + movw r26, DATA0 /* X4 <-> X6 */ + adiw r26, 4*8 + movw r30, DATA0 + adiw r30, 6*8 + rcall xchg_zx8 + + inc I +; mov r26, I +; cpi r26, 5 +; brne 9f +; rjmp exit +9: + rjmp 1b + +threefish1024_slut17: + .byte 0x00, 0x08, 0x10, 0x18, 0x20, 0x28, 0x30, 0x38 + .byte 0x40, 0x48, 0x50, 0x58, 0x60, 0x68, 0x70, 0x78 + .byte 0x80, 0x00, 0x08, 0x10, 0x18, 0x20, 0x28, 0x30 + .byte 0x38, 0x40, 0x48, 0x50, 0x58, 0x60, 0x68, 0x70 + .byte 0x78, 0x80, 0x00, 0x08, 0x10 +threefish1024_slut3: + .byte 0x00, 0x08, 0x10, 0x00, 0x08, 0x10, 0x00, 0x08 + .byte 0x10, 0x00, 0x08, 0x10, 0x00, 0x08, 0x10, 0x00 + .byte 0x08, 0x10, 0x00, 0x08, 0x10, 0x00, 0x08, 0x10 + .byte 0x00 +threefish1024_rc0: .byte 0x79, 0x31, 0x41, 0x42, 0x34, 0x21, 0x72, 0x69 +threefish1024_rc1: .byte 0x53, 0x31, 0x10, 0x53, 0x19, 0x1a, 0x19, 0x61 +threefish1024_rc2: .byte 0x5b, 0x6a, 0x22, 0x31, 0x69, 0x22, 0x40, 0x33 +threefish1024_rc3: .byte 0x50, 0x2b, 0x71, 0x74, 0x60, 0x31, 0x6b, 0x72 +threefish1024_rc4: .byte 0x20, 0x2a, 0x3b, 0x54, 0x63, 0x53, 0x23, 0x5b +threefish1024_rc5: .byte 0x3a, 0x2b, 0x14, 0x11, 0x11, 0x52, 0x22, 0x60 +threefish1024_rc6: .byte 0x5a, 0x64, 0x40, 0x73, 0x43, 0x50, 0x02, 0x7b +threefish1024_rc7: .byte 0x14, 0x71, 0x7a, 0x42, 0x51, 0x29, 0x70, 0x70 + +add_z_to_x8: + ld r0, Z+ + ld r1, X + add r1, r0 + st X+, r1 + ld r0, Z+ + ld r1, X + adc r1, r0 + st X+, r1 + ld r0, Z+ + ld r1, X + adc r1, r0 + st X+, r1 + ld r0, Z+ + ld r1, X + adc r1, r0 + st X+, r1 + ld r0, Z+ + ld r1, X + adc r1, r0 + st X+, r1 + ld r0, Z+ + ld r1, X + adc r1, r0 + st X+, r1 + ld r0, Z+ + ld r1, X + adc r1, r0 + st X+, r1 + ld r0, Z+ + ld r1, X + adc r1, r0 + st X+, r1 + clr r1 + ret + +T0 = IDX0 +T1 = 0 +CNT = 24 +xchg_zx8: + ldi CNT, 8 +1: ld T0, X + ld T1, Z + st X+, T1 + st Z+, T0 + dec CNT + brne 1b + ret + + + diff --git a/threefish256_enc_asm.S b/threefish256_enc_asm.S index 7fe3471..ccd457d 100644 --- a/threefish256_enc_asm.S +++ b/threefish256_enc_asm.S @@ -1,4 +1,4 @@ -/* threefish_mix.S */ +/* threefish256_enc_asm.S */ /* This file is part of the AVR-Crypto-Lib. Copyright (C) 2009 Daniel Otte (daniel.otte@rub.de) @@ -218,8 +218,7 @@ threefish256_enc: 2: ldi r30, lo8(threefish256_slut5) ldi r31, hi8(threefish256_slut5) - mov r26, S - add r30, r26 + add r30, S adc r31, r1 lpm IDX0, Z+ lpm IDX1, Z+ diff --git a/threefish512_dec.c b/threefish512_dec.c new file mode 100644 index 0000000..c535d33 --- /dev/null +++ b/threefish512_dec.c @@ -0,0 +1,81 @@ +/* threefish512_dec.c */ +/* + This file is part of the AVR-Crypto-Lib. + Copyright (C) 2009 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ +/* + * \author Daniel Otte + * \email daniel.otte@rub.de + * \date 2009-03-22 + * \license GPLv3 or later + * + * + * + */ + +#include +#include +#include "threefish.h" + + +#define X(a) (((uint64_t*)data)[(a)]) + + +static +void permute_inv8(void* data){ + uint64_t t; + t = X(6); + X(6) = X(4); + X(4) = X(2); + X(2) = X(0); + X(0) = t; + t = X(7); + X(7) = X(3); + X(3) = t; +} + +static +void add_key_8(void* data, const threefish512_ctx_t* ctx, uint8_t s){ + uint8_t i; + for(i=0; i<5; ++i){ + X(i) -= ctx->k[(s+i)%9]; + } + X(5) -= ctx->k[(s+5)%9] + ctx->t[s%3]; + X(6) -= ctx->k[(s+6)%9] + ctx->t[(s+1)%3]; + X(7) -= ctx->k[(s+7)%9] + s; +} + +void threefish512_dec(void* data, const threefish512_ctx_t* ctx){ + uint8_t i=0,s=18; + uint8_t r0[8] = {33, 29, 39, 33, 26, 34, 48, 38}; + uint8_t r1[8] = {51, 26, 27, 49, 12, 14, 20, 30}; + uint8_t r2[8] = {39, 11, 41, 8, 58, 15, 43, 50}; + uint8_t r3[8] = {35, 9, 14, 42, 7, 27, 31, 53}; + do{ + if(i%4==0){ + add_key_8(data, ctx, s); + --s; + } + permute_inv8(data); + threefish_invmix((uint8_t*)data + 0, r0[i%8]); + threefish_invmix((uint8_t*)data + 16, r1[i%8]); + threefish_invmix((uint8_t*)data + 32, r2[i%8]); + threefish_invmix((uint8_t*)data + 48, r3[i%8]); + ++i; + }while(i!=72); + add_key_8(data, ctx, s); +} + diff --git a/threefish512_enc_asm.S b/threefish512_enc_asm.S new file mode 100644 index 0000000..bb6ff2d --- /dev/null +++ b/threefish512_enc_asm.S @@ -0,0 +1,452 @@ +/* threefish512_enc_asm.S */ +/* + This file is part of the AVR-Crypto-Lib. + Copyright (C) 2009 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ +/* + * \author Daniel Otte + * \email daniel.otte@rub.de + * \date 2009-03-24 + * \license GPLv3 or later + */ + +#include "avr-asm-macros.S" + +/******************************************************************************/ +A0 = 14 +A1 = 15 +A2 = 16 +A3 = 17 +A4 = 18 +A5 = 19 +A6 = 20 +A7 = 21 +/* +#define THREEFISH_KEY_CONST 0x5555.5555.5555.5555.LL / * 2**64/3 * / + +#define K(s) (((uint64_t*)key)[(s)]) +#define T(s) (((uint64_t*)tweak)[(s)]) +void threefish512_init(const void* key, const void* tweak, threefish512_ctx_t* ctx){ + memcpy(ctx->k, key, 8*8); + memcpy(ctx->t, tweak, 2*8); + uint8_t i; + ctx->k[8] = THREEFISH_KEY_CONST; + for(i=0; i<8; ++i){ + ctx->k[8] ^= K(i); + } + ctx->t[2] = T(0) ^ T(1); +} +*/ +/* + * param key: r24:r25 + * param tweak: r22:r23 + * param ctx: r20:r21 + */ +.global threefish512_init +threefish512_init: + push_range 14, 17 + movw r30, r20 + movw r26, r24 + ldi r24, 8 + ldi A7, 0x55 + mov A6, A7 + movw A4, A6 + movw A2, A6 + movw A0, A6 +1: + ld r0, X+ + st Z+, r0 + eor A0, r0 + ld r0, X+ + st Z+, r0 + eor A1, r0 + ld r0, X+ + st Z+, r0 + eor A2, r0 + ld r0, X+ + st Z+, r0 + eor A3, r0 + ld r0, X+ + st Z+, r0 + eor A4, r0 + ld r0, X+ + st Z+, r0 + eor A5, r0 + ld r0, X+ + st Z+, r0 + eor A6, r0 + ld r0, X+ + st Z+, r0 + eor A7, r0 + dec r24 + brne 1b + st Z+, A0 + st Z+, A1 + st Z+, A2 + st Z+, A3 + st Z+, A4 + st Z+, A5 + st Z+, A6 + st Z+, A7 + /* now the tweak */ + movw r26, r22 + ld A0, X+ + ld A1, X+ + ld A2, X+ + ld A3, X+ + ld A4, X+ + ld A5, X+ + ld A6, X+ + ld A7, X+ + st Z+, A0 + st Z+, A1 + st Z+, A2 + st Z+, A3 + st Z+, A4 + st Z+, A5 + st Z+, A6 + st Z+, A7 + ld r0, X+ + eor A0, r0 + st Z+, r0 + ld r0, X+ + eor A1, r0 + st Z+, r0 + ld r0, X+ + eor A2, r0 + st Z+, r0 + ld r0, X+ + eor A3, r0 + st Z+, r0 + ld r0, X+ + eor A4, r0 + st Z+, r0 + ld r0, X+ + eor A5, r0 + st Z+, r0 + ld r0, X+ + eor A6, r0 + st Z+, r0 + ld r0, X+ + eor A7, r0 + st Z+, r0 + st Z+, A0 + st Z+, A1 + st Z+, A2 + st Z+, A3 + st Z+, A4 + st Z+, A5 + st Z+, A6 + st Z+, A7 + pop_range 14, 17 + ret + +/******************************************************************************/ +/* +#define X(a) (((uint64_t*)data)[(a)]) +void permute_8(void* data){ + uint64_t t; + t = X(0); + X(0) = X(2); + X(2) = X(4); + X(4) = X(6); + X(6) = t; + t = X(3); + X(3) = X(7); + X(7) = t; +} +void add_key_8(void* data, const threefish512_ctx_t* ctx, uint8_t s){ + uint8_t i; + for(i=0; i<5; ++i){ + X(i) += ctx->k[(s+i)%9]; + } + X(5) += ctx->k[(s+5)%9] + ctx->t[s%3]; + X(6) += ctx->k[(s+6)%9] + ctx->t[(s+1)%3]; + X(7) += ctx->k[(s+7)%9] + s; +} +void threefish512_enc(void* data, const threefish512_ctx_t* ctx){ + uint8_t i=0,s=0; + uint8_t r0[8] = {38, 48, 34, 26, 33, 39, 29, 33}; + uint8_t r1[8] = {30, 20, 14, 12, 49, 27, 26, 51}; + uint8_t r2[8] = {50, 43, 15, 58, 8, 41, 11, 39}; + uint8_t r3[8] = {53, 31, 27, 7, 42, 14, 9, 35}; + do{ + if(i%4==0){ + add_key_8(data, ctx, s); + ++s; + } + threefish_mix((uint8_t*)data + 0, r0[i%8]); + threefish_mix((uint8_t*)data + 16, r1[i%8]); + threefish_mix((uint8_t*)data + 32, r2[i%8]); + threefish_mix((uint8_t*)data + 48, r3[i%8]); + permute_8(data); + ++i; + }while(i!=72); + add_key_8(data, ctx, s); +} + +*/ +I = 2 +S = 3 +DATA0 = 4 +DATA1 = 5 +CTX0 = 6 +CTX1 = 7 +IDX0 = 8 +IDX1 = 9 +IDX2 = 10 +IDX3 = 11 +IDX4 = 12 +IDX5 = 13 +IDX6 = 14 +IDX7 = 15 +/* + * param data: r24:r25 + * param ctx: r22:r23 + */ +.global threefish512_enc +threefish512_enc: + push r28 + push r29 + push_range 2, 17 + movw DATA0, r24 + movw CTX0, r22 + clr I + clr S +1: + mov r30, I + andi r30, 0x03 + breq 2f + rjmp 4f +2: + ldi r30, lo8(threefish512_slut9) + ldi r31, hi8(threefish512_slut9) + add r30, S + adc r31, r1 + lpm IDX0, Z+ + lpm IDX1, Z+ + lpm IDX2, Z+ + lpm IDX3, Z+ + lpm IDX4, Z+ + lpm IDX5, Z+ + lpm IDX6, Z+ + lpm IDX7, Z + movw r30, CTX0 + movw r26, DATA0 + add r30, IDX0 + adc r31, r1 + rcall add_z_to_x8 + movw r30, CTX0 + add r30, IDX1 + adc r31, r1 + rcall add_z_to_x8 + movw r30, CTX0 + add r30, IDX2 + adc r31, r1 + rcall add_z_to_x8 + movw r30, CTX0 + add r30, IDX3 + adc r31, r1 + rcall add_z_to_x8 + movw r30, CTX0 + add r30, IDX4 + adc r31, r1 + rcall add_z_to_x8 + movw r30, CTX0 + add r30, IDX5 + adc r31, r1 + rcall add_z_to_x8 + movw r30, CTX0 + add r30, IDX6 + adc r31, r1 + rcall add_z_to_x8 + movw r30, CTX0 + add r30, IDX7 + adc r31, r1 + rcall add_z_to_x8 + + /* now the remaining key */ + sbiw r26, 3*8 + ldi r30, lo8(threefish512_slut3) + ldi r31, hi8(threefish512_slut3) + add r30, S + adc r31, r1 + lpm IDX0, Z+ + lpm IDX1, Z + movw r30, CTX0 + adiw r30, 7*8 /* make Z pointing to (extended tweak) */ + adiw r30, 2*8 + movw IDX2, r30 + add r30, IDX0 + adc r31, r1 + rcall add_z_to_x8 + movw r30, IDX2 + add r30, IDX1 + adc r31, r1 + rcall add_z_to_x8 + ld r0, X + add r0, S + st X+, r0 + ld r0, X + adc r0, r1 + st X+, r0 + ld r0, X + adc r0, r1 + st X+, r0 + ld r0, X + adc r0, r1 + st X+, r0 + ld r0, X + adc r0, r1 + st X+, r0 + ld r0, X + adc r0, r1 + st X+, r0 + ld r0, X + adc r0, r1 + st X+, r0 + ld r0, X + adc r0, r1 + st X+, r0 + inc S + mov r26, S + cpi r26, 19 + brmi 4f +exit: + pop_range 2, 17 + pop r29 + pop r28 + ret +4: + /* call mix */ + ldi r30, lo8(threefish512_rc0) + ldi r31, hi8(threefish512_rc0) + mov r26, I + andi r26, 0x07 + add r30, r26 + adc r31, r1 + lpm r22, Z + adiw r30, 8 + lpm IDX0, Z + adiw r30, 8 + lpm IDX1, Z + push IDX1 + adiw r30, 8 + lpm IDX1, Z + + movw r24, DATA0 + call threefish_mix_asm /* no rcall? */ + movw r24, DATA0 + adiw r24, 16 + mov r22, IDX0 + call threefish_mix_asm /* no rcall? */ + movw r24, DATA0 + adiw r24, 32 + pop r22 + ;mov r22, IDX0 + call threefish_mix_asm /* no rcall? */ + movw r24, DATA0 + adiw r24, 48 + mov r22, IDX1 + call threefish_mix_asm /* no rcall? */ + /* now the permutation */ + movw r26, DATA0 + movw r30, DATA0 + adiw r30, 6*8 + rcall xchg_zx8 + movw r26, DATA0 + movw r30, DATA0 + adiw r30, 2*8 + rcall xchg_zx8 + movw r26, DATA0 + adiw r26, 2*8 + movw r30, DATA0 + adiw r30, 4*8 + rcall xchg_zx8 + movw r26, DATA0 + adiw r26, 3*8 + movw r30, DATA0 + adiw r30, 7*8 + rcall xchg_zx8 + inc I + rjmp 1b + +threefish512_slut9: + .byte 0x00, 0x08, 0x10, 0x18, 0x20, 0x28, 0x30, 0x38 + .byte 0x40, 0x00, 0x08, 0x10, 0x18, 0x20, 0x28, 0x30 + .byte 0x38, 0x40, 0x00, 0x08, 0x10, 0x18, 0x20, 0x28 + .byte 0x30, 0x38, 0x40 +threefish512_slut3: + .byte 0x00, 0x08, 0x10, 0x00, 0x08, 0x10, 0x00, 0x08 + .byte 0x10, 0x00, 0x08, 0x10, 0x00, 0x08, 0x10, 0x00 + .byte 0x08, 0x10, 0x00, 0x08, 0x10, 0x00, 0x08 + +threefish512_rc0: .byte 0x5a, 0x60, 0x42, 0x32, 0x41, 0x59, 0x4b, 0x41 +threefish512_rc1: .byte 0x4a, 0x24, 0x2a, 0x14, 0x61, 0x33, 0x32, 0x63 +threefish512_rc2: .byte 0x62, 0x53, 0x29, 0x72, 0x10, 0x51, 0x13, 0x59 +threefish512_rc3: .byte 0x7b, 0x49, 0x33, 0x19, 0x52, 0x2a, 0x11, 0x43 + +add_z_to_x8: + ld r0, Z+ + ld r1, X + add r1, r0 + st X+, r1 + ld r0, Z+ + ld r1, X + adc r1, r0 + st X+, r1 + ld r0, Z+ + ld r1, X + adc r1, r0 + st X+, r1 + ld r0, Z+ + ld r1, X + adc r1, r0 + st X+, r1 + ld r0, Z+ + ld r1, X + adc r1, r0 + st X+, r1 + ld r0, Z+ + ld r1, X + adc r1, r0 + st X+, r1 + ld r0, Z+ + ld r1, X + adc r1, r0 + st X+, r1 + ld r0, Z+ + ld r1, X + adc r1, r0 + st X+, r1 + clr r1 + ret + +T0 = IDX0 +T1 = 0 +CNT = 24 +xchg_zx8: + ldi CNT, 8 +1: ld T0, X + ld T1, Z + st X+, T1 + st Z+, T0 + dec CNT + brne 1b + ret + + + diff --git a/threefish_mix.S b/threefish_mix.S index 2a35940..00952a3 100644 --- a/threefish_mix.S +++ b/threefish_mix.S @@ -131,6 +131,7 @@ post_bitrot: exit: ret + byte_rot_jmptable: rjmp post_byterot;ret; rjmp byte_rotr_0 rjmp byte_rotr_7 @@ -144,7 +145,8 @@ byte_rot_jmptable: ; 0 1 2 3 4 5 6 7 ; 1 2 3 4 5 6 7 0 - +.global byte_rotr_1 +.global byte_rotr_0 byte_rotr_1: /* 10 words */ mov r0, B0 mov B0, B1 @@ -160,7 +162,7 @@ byte_rotr_0: ; 0 1 2 3 4 5 6 7 ; 2 3 4 5 6 7 0 1 - +.global byte_rotr_2 byte_rotr_2: /* 11 words */ mov r0, B0 mov B0, B2 @@ -176,7 +178,7 @@ byte_rotr_2: /* 11 words */ ; 0 1 2 3 4 5 6 7 ; 3 4 5 6 7 0 1 2 - +.global byte_rotr_3 byte_rotr_3: /* 10 words */ mov r0, B0 mov B0, B3 @@ -191,6 +193,7 @@ byte_rotr_3: /* 10 words */ ; 0 1 2 3 4 5 6 7 ; 4 5 6 7 0 1 2 3 +.global byte_rotr_4 byte_rotr_4: /* 13 words */ mov r0, B0 mov B0, B4 @@ -211,6 +214,7 @@ byte_rotr_4: /* 13 words */ ; 0 1 2 3 4 5 6 7 ; 5 6 7 0 1 2 3 4 +.global byte_rotr_5 byte_rotr_5: /* 10 words */ mov r0, B0 mov B0, B5 @@ -225,6 +229,7 @@ byte_rotr_5: /* 10 words */ ; 0 1 2 3 4 5 6 7 ; 6 7 0 1 2 3 4 5 +.global byte_rotr_6 byte_rotr_6: /* 11 words */ mov r0, B0 mov B0, B6 @@ -241,6 +246,7 @@ byte_rotr_6: /* 11 words */ ; 0 1 2 3 4 5 6 7 ; 7 0 1 2 3 4 5 6 +.global byte_rotr_7 byte_rotr_7: /* 10 words */ mov r0, B7 mov B7, B6 @@ -253,6 +259,7 @@ byte_rotr_7: /* 10 words */ mov B0, r0 rjmp post_byterot +.global bit_rotl bit_rotl: tst vROT brne 1f @@ -270,6 +277,7 @@ bit_rotl: dec vROT rjmp bit_rotl +.global bit_rotr bit_rotr: tst vROT brne 1f