From 4147d732efd04605764a2820d4478f33a0cdf1fa Mon Sep 17 00:00:00 2001 From: bg Date: Sat, 21 Mar 2009 15:15:02 +0000 Subject: [PATCH] +threefis256 asm +ubi256 asm + skein256 asm --- config.h | 2 +- host/shavs_test.rb | 43 +++- mkfiles/skein.mk | 7 +- mkfiles/threefish.mk | 5 +- mkfiles/ubi.mk | 6 +- skein256_asm.S | 343 +++++++++++++++++++++++++++ test_src/main-skein-test.c | 11 + test_src/main-threefish-test.c | 27 ++- threefish.h | 2 + threefish1024_enc.c | 26 +-- threefish256_enc.c | 14 +- threefish256_enc_asm.S | 411 +++++++++++++++++++++++++++++++++ threefish256_enc_small.S | 350 ++++++++++++++++++++++++++++ threefish512_enc.c | 18 +- threefish_mix.S | 303 ++++++++++++++++++++++++ threefish_mix_4c.S | 328 ++++++++++++++++++++++++++ threefish_mix_c.c | 38 +++ ubi256_asm.S | 327 ++++++++++++++++++++++++++ 18 files changed, 2201 insertions(+), 60 deletions(-) create mode 100644 skein256_asm.S create mode 100644 threefish256_enc_asm.S create mode 100644 threefish256_enc_small.S create mode 100644 threefish_mix.S create mode 100644 threefish_mix_4c.S create mode 100644 threefish_mix_c.c create mode 100644 ubi256_asm.S diff --git a/config.h b/config.h index 851388f..20af845 100644 --- a/config.h +++ b/config.h @@ -35,7 +35,7 @@ #define UART_LINE_BUFFER_SIZE 40 #define UART_XON_XOFF #define UART_XON_XOFF_THRESHOLD_1 (UART_RXBUFSIZE - 24) -#define UART_XON_XOFF_THRESHOLD_2 (UART_RXBUFSIZE - 30) +#define UART_XON_XOFF_THRESHOLD_2 (UART_RXBUFSIZE - 60) #undef UART_LEDS /* diff --git a/host/shavs_test.rb b/host/shavs_test.rb index 7670543..1ce86fb 100644 --- a/host/shavs_test.rb +++ b/host/shavs_test.rb @@ -19,7 +19,7 @@ =end $debug = false - +require 'rubygems' require 'serialport' def init_system @@ -49,7 +49,18 @@ def get_md return line end +def send_md(md_string) + for i in 0..md_string.length-1 + $sp.print(md_string[i].chr) +# print(md_string[i].chr) + if(i%20==19) + sleep(0.1) + end + end +end + def run_test(filename) + errors = 0 if not File.exist?(filename) puts("ERROR file "+filename+" does not exist!") end @@ -68,7 +79,7 @@ def run_test(filename) end while not (file.eof or (/[\s]*Msg[\s]*=.*/.match(lb))) return if file.eof puts("DBG sending: "+lb) if $debug - $sp.print(lb.strip) + send_md(lb.strip) avr_md = get_md() begin lb=file.gets() @@ -78,10 +89,16 @@ def run_test(filename) a.upcase! b.upcase! puts("") if (pos%$linewidth==0 and $linewidth!=0) - putc((a==b)?'*':'!') + #putc((a==b)?'*':'!') + if(a==b) + putc('*') + else + putc('!') + errors += 1; + end pos += 1 end - + return errors end if ARGV.size < 6 @@ -92,17 +109,29 @@ EOF end puts("\nPort: "+ARGV[0]+ "@"+ARGV[1]+" "+ARGV[2]+"N"+ARGV[3]+"\n"); +puts("serial port interface version: " + SerialPort::VERSION); $linewidth = 64 -$sp = SerialPort.new(ARGV[0], ARGV[1].to_i, ARGV[2].to_i, ARGV[3].to_i, SerialPort::NONE); +$params = { "baud" => ARGV[1].to_i, + "data_bits" => ARGV[2].to_i, + "stop_bits" => ARGV[3].to_i, + "parity" => SerialPort::NONE } +$sp = SerialPort.new(ARGV[0], $params) +#$sp = SerialPort.new(ARGV[0], ARGV[1].to_i, ARGV[2].to_i, ARGV[3].to_i, SerialPort::NONE); + $sp.read_timeout=1000; # 5 minutes +$sp.flow_control = SerialPort::SOFT $algo_select = ARGV[4] #irb init_system() for i in (5..(ARGV.size-1)) - run_test(ARGV[i]) - puts("") + errors = run_test(ARGV[i]) + if errors == 0 + puts("[ok]") + else + puts("[errors: "+errors.to_s+"]") + end end $sp.print("EXIT\r"); diff --git a/mkfiles/skein.mk b/mkfiles/skein.mk index 59c6d58..a10e833 100644 --- a/mkfiles/skein.mk +++ b/mkfiles/skein.mk @@ -1,12 +1,13 @@ # Makefile for Skein -ALGO_NAME := SKEIN_C +ALGO_NAME := SKEIN # comment out the following line for removement of Skein from the build process HASHES += $(ALGO_NAME) -$(ALGO_NAME)_OBJ := threefish256_enc.o threefish512_enc.o threefish1024_enc.o \ - ubi256.o ubi512.o ubi1024.o memxor.o skein256.o skein512.o skein1024.o +$(ALGO_NAME)_OBJ := threefish_mix.o threefish256_enc_asm.o ubi256_asm.o skein256_asm.o \ + threefish_mix_4c.o threefish512_enc.o threefish1024_enc.o \ + ubi512.o ubi1024.o memxor.o skein512.o skein1024.o $(ALGO_NAME)_TEST_BIN := main-skein-test.o debug.o uart.o hexdigit_tab.o \ dbz_strings.o nessie_common.o cli.o string-extras.o performance_test.o \ hfal-basic.o hfal_skein256.o hfal_skein512.o hfal_skein1024.o shavs.o diff --git a/mkfiles/threefish.mk b/mkfiles/threefish.mk index a2e47ca..ab81e89 100644 --- a/mkfiles/threefish.mk +++ b/mkfiles/threefish.mk @@ -1,11 +1,12 @@ # Makefile for threefish -ALGO_NAME := THREEFISH_C +ALGO_NAME := THREEFISH # comment out the following line for removement of threefish from the build process BLOCK_CIPHERS += $(ALGO_NAME) -$(ALGO_NAME)_OBJ := threefish256_enc.o threefish512_enc.o threefish1024_enc.o +$(ALGO_NAME)_OBJ := threefish256_enc_asm.o threefish512_enc.o threefish1024_enc.o\ + threefish_mix.o threefish_mix_4c.o $(ALGO_NAME)_TEST_BIN := main-threefish-test.o debug.o uart.o hexdigit_tab.o \ nessie_bc_test.o dbz_strings.o nessie_common.o cli.o string-extras.o performance_test.o $(ALGO_NAME)_NESSIE_TEST := test nessie diff --git a/mkfiles/ubi.mk b/mkfiles/ubi.mk index f331cb6..18ea354 100644 --- a/mkfiles/ubi.mk +++ b/mkfiles/ubi.mk @@ -1,12 +1,12 @@ # Makefile for UBI -ALGO_NAME := UBI_C +ALGO_NAME := UBI # comment out the following line for removement of ubi from the build process HASHES += $(ALGO_NAME) -$(ALGO_NAME)_OBJ := threefish256_enc.o threefish512_enc.o threefish1024_enc.o \ - ubi256.o ubi512.o ubi1024.o memxor.o +$(ALGO_NAME)_OBJ := threefish_mix.o threefish256_enc_asm.o ubi256_asm.o threefish512_enc.o threefish1024_enc.o \ + threefish_mix_4c.o ubi512.o ubi1024.o memxor.o $(ALGO_NAME)_TEST_BIN := main-ubi-test.o debug.o uart.o hexdigit_tab.o \ dbz_strings.o nessie_common.o cli.o string-extras.o performance_test.o $(ALGO_NAME)_NESSIE_TEST := test nessie diff --git a/skein256_asm.S b/skein256_asm.S new file mode 100644 index 0000000..b6b451d --- /dev/null +++ b/skein256_asm.S @@ -0,0 +1,343 @@ +/* skein256_asm.S */ +/* + This file is part of the AVR-Crypto-Lib. + Copyright (C) 2009 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ +/* + * \author Daniel Otte + * \email daniel.otte@rub.de + * \date 2009-03-16 + * \license GPLv3 or later + */ + +#include "avr-asm-macros.S" + +/******************************************************************************/ +/* +void skein256_init(skein256_ctx_t* ctx, uint16_t outsize_b){ + skein_config_t conf; + uint8_t null[UBI256_BLOCKSIZE_B]; + memset(null, 0, UBI256_BLOCKSIZE_B); + memset(&conf, 0, sizeof(skein_config_t)); + conf.schema[0] = 'S'; + conf.schema[1] = 'H'; + conf.schema[2] = 'A'; + conf.schema[3] = '3'; + conf.version = 1; + conf.out_length = outsize_b; + ctx->outsize_b = outsize_b; + ubi256_init(&(ctx->ubictx), null, UBI_TYPE_CFG); + ubi256_lastBlock(&(ctx->ubictx), &conf, 256); + ubi256_init(&(ctx->ubictx), ctx->ubictx.g, UBI_TYPE_MSG); +} +*/ +/* + * param ctx: r24:r25 + * param outsize_b: r22:r23 + */ +UBICTX0 = 2 +UBICTX1 = 3 +CONF0 = 4 +CONF1 = 5 +.global skein256_init +skein256_init: + push_range 2, 5 + stack_alloc 64-22 + adiw r30, 1 + movw CONF0, r30 + movw r26, r24 + st X+, r22 + st X+, r23 + movw UBICTX0, r26 + ldi r24, 'S' + st Z+, r24 + ldi r24, 'H' + st Z+, r24 + ldi r24, 'A' + st Z+, r24 + ldi r24, '3' + st Z+, r24 + ldi r24, 1 + st Z+, r24 + st Z+, r1 + st Z+, r1 + st Z+, r1 + st Z+, r22 + st Z+, r23 + ldi 24, 22+10 +1: st Z+, r1 + dec r24 + brne 1b + /* call ubi256_init*/ + sbiw r30, 32 + movw r24, UBICTX0 + movw r22, r30 + ldi r20, 4 + rcall ubi256_init + /* call ubi256_lastBlock*/ + movw r24, UBICTX0 + movw r22, CONF0 + ldi r21, 1 + clr r20 + rcall ubi256_lastBlock + /* call ubi256_init*/ + movw r24, UBICTX0 + adiw r24, 16 + movw r22, r24 + movw r24, UBICTX0 + ldi r20, 48 + rcall ubi256_init + stack_free 64-22 + pop_range 2, 5 + ret + +/******************************************************************************/ +.global skein256_nextBlock +skein256_nextBlock: + adiw r24, 2 + rjmp ubi256_nextBlock + +/******************************************************************************/ +.global skein256_lastBlock +skein256_lastBlock: + adiw r24, 2 + rjmp ubi256_lastBlock + +/******************************************************************************/ +/* +void skein256_ctx2hash(void* dest, skein256_ctx_t* ctx){ + ubi256_ctx_t uctx; + uint16_t outsize_b; + + uint64_t counter=0; + uint8_t outbuffer[UBI256_BLOCKSIZE_B]; + ubi256_init(&(ctx->ubictx), ctx->ubictx.g, UBI_TYPE_OUT); + + outsize_b = ctx->outsize_b; + while(1){ + memcpy(&uctx, &(ctx->ubictx), sizeof(ubi256_ctx_t)); + ubi256_lastBlock(&uctx, &counter, 64); + ubi256_ctx2hash(outbuffer, &uctx); + if(outsize_b<=UBI256_BLOCKSIZE){ + memcpy(dest, outbuffer, (outsize_b+7)/8); + break; + }else{ + memcpy(dest, outbuffer, UBI256_BLOCKSIZE_B); + dest = (uint8_t*)dest + UBI256_BLOCKSIZE_B; + outsize_b -= UBI256_BLOCKSIZE; + counter++; + } + } +} +*/ +/* + * param dest: r24:r25 + * param ctx: r22:r23 + */ + OUTSIZE_B0 = 16 + OUTSIZE_B1 = 17 + UCTX0 = 14 + UCTX1 = 15 + UBICTX0 = 12 + UBICTX1 = 13 + DEST0 = 10 + DEST1 = 11 +.global skein256_ctx2hash +skein256_ctx2hash: + push_range 10, 17 + /* 48 || 8 || 32 */ + stack_alloc_large 88 /* uctx || counter || outbuffer */ + movw DEST0, r24 + adiw r30, 1 + movw UCTX0, r30 + adiw r30, 48 + st Z+, r1 + st Z+, r1 + st Z+, r1 + st Z+, r1 + st Z+, r1 + st Z+, r1 + st Z+, r1 + st Z+, r1 + movw r26, 22 + ld OUTSIZE_B0, X+ + ld OUTSIZE_B1, X+ + movw UBICTX0, r26 + /* call ubi256_init */ + movw r24, UBICTX0 + adiw r24, 16 + movw r22, r24 + movw r24, UBICTX0 + ldi r20, 63 + rcall ubi256_init + + /* main loop */ + /* copy ubictx in uctx*/ + movw r30, UCTX0 + movw r26, UBICTX0 + ldi r24, 48 +2: ld r25, X+ + st Z+, r25 + dec r24 + brne 2b + /* call ubi256_lastBlock */ + movw r24, UCTX0 + adiw r24, 48 + movw r22, r24 + movw r24, UCTX0 + clr r21 + ldi r20, 64 + rcall ubi256_lastBlock + /* copy uctx->g to outbuffer */ + movw r26, UCTX0 + adiw r26, 16 + movw r30, UCTX0 + adiw r30, 56 + ldi r24, 32 +2: ld r25, X+ + st Z+, r25 + dec r24 + brne 2b + /* compare outsize_b with 256*/ + cpi OUTSIZE_B1, 2 + brge 5f + cpi OUTSIZE_B1, 1 + brlo 3f + tst OUTSIZE_B0 + breq 3f +5: /* copy outbuffer to dest */ + movw r30, DEST0 + movw r26, UCTX0 + adiw r26, 56 + ldi r24, 32 +6: ld r25, X+ + st Z+, r25 + dec r24 + brne 6b + /* store new dest */ + movw DEST0, r26 + /* adjust counter and outsize_b*/ + dec OUTSIZE_B1 + movw r30, UCTX0 + adiw r30, 48 + ldi r24, 1 + ld r25, Z + add r25, r24 + st Z+, r25 + ldi r24, 7 +6: ld r25, Z + adc r25, r1 + st Z+, r25 + dec r24 + brne 6b + rjmp 1b +3: /* last iteraton */ + movw r24, OUTSIZE_B0 + adiw r24, 7 + lsr r25 + ror r24 + lsr r24 + lsr r24 + movw r30, DEST0 + movw r26, UCTX0 + adiw r26, 56 + tst r24 + breq 8f +7: ld r25, X+ + st Z+, r25 + dec r24 + brne 7b +8: + stack_free_large 88 + pop_range 10, 17 + ret + +/******************************************************************************/ +/* +void skein256(void* dest, uint16_t outlength_b, const void* msg, uint32_t length_b){ + skein256_ctx_t ctx; + skein256_init(&ctx, outlength_b); + while(length_b>SKEIN256_BLOCKSIZE){ + skein256_nextBlock(&ctx, msg); + msg = (uint8_t*)msg + SKEIN256_BLOCKSIZE_B; + length_b -= SKEIN256_BLOCKSIZE; + } + skein256_lastBlock(&ctx, msg, length_b); + skein256_ctx2hash(dest, &ctx); +} +*/ +/* + * param dest: r24:r25 + * param outlength_b: r22:r23 + * param msg: r20:r21 + * param length_b: r16:r19 + */ +LENGTH_B0 = 2 +LENGTH_B1 = 3 +LENGTH_B2 = 4 +LENGTH_B3 = 5 +DEST0 = 6 +DEST1 = 7 +MSG0 = 8 +MSG1 = 9 +CTX0 = 10 +CTX1 = 11 +.global skein256 +skein256: + push_range 2, 11 + stack_alloc 50 + adiw r30, 1 + movw CTX0, r30 + movw DEST0, r24 + movw MSG0, r20 + movw LENGTH_B0, r16 + movw LENGTH_B2, r18 + /* call skein256_init */ + movw r24, r30 + rcall skein256_init +1: tst LENGTH_B2 + brne 4f + tst LENGTH_B3 + brne 4f + /* call skein256_lastBlock */ + movw r24, CTX0 + movw r22, MSG0 + movw r20, LENGTH_B0 + rcall skein256_lastBlock + /* call skein256_ctx2hash */ + movw r24, DEST0 + movw r22, CTX0 + rcall skein256_ctx2hash + /* return */ + stack_free 50 + pop_range 2, 11 + ret + +4: /* process preceeding blocks */ + movw r24, CTX0 + movw r22, MSG0 + rcall skein256_nextBlock + movw r24, MSG0 + adiw r24, 32 + movw MSG0, r24 + mov r24, LENGTH_B1 + mov r25, LENGTH_B2 + sbiw r24, 1 + sbc LENGTH_B3, r1 + mov LENGTH_B1, r24 + mov LENGTH_B2, r25 + rjmp 1b + diff --git a/test_src/main-skein-test.c b/test_src/main-skein-test.c index aa68950..79bc3ff 100644 --- a/test_src/main-skein-test.c +++ b/test_src/main-skein-test.c @@ -53,16 +53,19 @@ void testrun_stdtest_skein256(uint16_t outsize_b){ message[i] = 0xFF-i; cli_putstr_P(PSTR("\r\nmessage: ")); + cli_hexdump(message, 1); skein256(hash, outsize_b, message, 8); cli_putstr_P(PSTR("\r\nhash:")); cli_hexdump_block(hash, (outsize_b+7)/8, 4, 16); cli_putstr_P(PSTR("\r\nmessage:")); + cli_hexdump_block(message, 32, 4, 16); skein256(hash, outsize_b, message, 32*8); cli_putstr_P(PSTR("\r\nhash:")); cli_hexdump_block(hash, (outsize_b+7)/8, 4, 16); cli_putstr_P(PSTR("\r\nmessage:")); + cli_hexdump_block(message, 64, 4, 16); skein256(hash, outsize_b, message, 64*8); cli_putstr_P(PSTR("\r\nhash:")); cli_hexdump_block(hash, (outsize_b+7)/8, 4, 16); @@ -84,11 +87,13 @@ void testrun_stdtest_skein512(uint16_t outsize_b){ cli_hexdump_block(hash, (outsize_b+7)/8, 4, 16); cli_putstr_P(PSTR("\r\nmessage:")); + cli_hexdump_block(message, 64, 4, 16); skein512(hash, outsize_b, message, 64*8); cli_putstr_P(PSTR("\r\nhash:")); cli_hexdump_block(hash, (outsize_b+7)/8, 4, 16); cli_putstr_P(PSTR("\r\nmessage:")); + cli_hexdump_block(message, 128, 4, 16); skein512(hash, outsize_b, message, 128*8); cli_putstr_P(PSTR("\r\nhash:")); cli_hexdump_block(hash, (outsize_b+7)/8, 4, 16); @@ -110,6 +115,7 @@ void testrun_stdtest_skein1024(uint16_t outsize_b){ cli_hexdump_block(hash, (outsize_b+7)/8, 4, 16); cli_putstr_P(PSTR("\r\nmessage:")); + cli_hexdump_block(message, 128, 4, 16); skein1024(hash, outsize_b, message, 128*8); cli_putstr_P(PSTR("\r\nhash:")); cli_hexdump_block(hash, (outsize_b+7)/8, 4, 16); @@ -228,7 +234,12 @@ int main (void){ for(;;){ cli_putstr_P(PSTR("\r\n\r\nCrypto-VS (")); cli_putstr(algo_name); + cli_putstr_P(PSTR("; ")); + cli_putstr(__DATE__); + cli_putstr_P(PSTR(" ")); + cli_putstr(__TIME__); cli_putstr_P(PSTR(")\r\nloaded and running\r\n")); + cmd_interface(cmdlist); } } diff --git a/test_src/main-threefish-test.c b/test_src/main-threefish-test.c index da964b3..74a2aca 100644 --- a/test_src/main-threefish-test.c +++ b/test_src/main-threefish-test.c @@ -61,7 +61,17 @@ void testrun_stdtest_threefish256(void){ threefish256_enc(data, &ctx); cli_putstr_P(PSTR("\r\ncipher: ")); cli_hexdump(data, 32); - + /* + cli_hexdump_rev(data, 8); + cli_putc(' '); + cli_hexdump_rev(data+8, 8); + cli_putc(' '); + cli_hexdump_rev(data+16, 8); + cli_putc(' '); + cli_hexdump_rev(data+24, 8); + cli_putc(' '); + */ + /* second test */ for(i=0; i<32; ++i){ key[i] = 0x10+i; data[i] = 0xFF-i; @@ -323,18 +333,33 @@ void testrun_performance_threefish(void){ testrun_performance_threefish1024(); } +void init_test(void){ + threefish256_ctx_t ctx; + uint8_t key[32], tweak[16]; + memset(key, 0,32); + memset(tweak, 0,16); + threefish256_init(key, tweak, &ctx); + cli_putstr_P(PSTR("\r\n ctx: \r\n\tk:")); + cli_hexdump(ctx.k, 5*8); + cli_putstr_P(PSTR("\r\n\tt:")); + cli_hexdump(ctx.t, 3*8); +} + + /***************************************************************************** * main * *****************************************************************************/ const char nessie_str[] PROGMEM = "nessie"; const char test_str[] PROGMEM = "test"; +const char inittest_str[] PROGMEM = "inittest"; const char performance_str[] PROGMEM = "performance"; const char echo_str[] PROGMEM = "echo"; cmdlist_entry_t cmdlist[] PROGMEM = { // { nessie_str, NULL, testrun_nessie_noekeon}, { test_str, NULL, testrun_stdtest_threefish}, + { inittest_str, NULL, init_test}, { performance_str, NULL, testrun_performance_threefish}, { echo_str, (void*)1, (void_fpt)echo_ctrl}, { NULL, NULL, NULL} diff --git a/threefish.h b/threefish.h index 618babe..0d3a413 100644 --- a/threefish.h +++ b/threefish.h @@ -54,6 +54,8 @@ typedef struct{ } threefish1024_ctx_t; +void threefish_mix(void* data, uint8_t rot); +void threefish256_init_c(void* key, void* tweak, threefish256_ctx_t* ctx); void threefish256_init(void* key, void* tweak, threefish256_ctx_t* ctx); void threefish512_init(void* key, void* tweak, threefish512_ctx_t* ctx); diff --git a/threefish1024_enc.c b/threefish1024_enc.c index 2fa1957..4f6f9d4 100644 --- a/threefish1024_enc.c +++ b/threefish1024_enc.c @@ -30,16 +30,6 @@ #include #include "threefish.h" -#define X0 (((uint64_t*)data)[0]) -#define X1 (((uint64_t*)data)[1]) -static -void mix(void* data, uint8_t rot){ - uint64_t x; - x = X1; - X0 += x; - X1 = ((x<>(64-rot))) ^ X0; -} - #define X(a) (((uint64_t*)data)[(a)]) static @@ -108,14 +98,14 @@ void threefish1024_enc(void* data, threefish1024_ctx_t* ctx){ add_key_16(data, ctx, s); ++s; } - mix((uint8_t*)data + 0, r0[i%8]); - mix((uint8_t*)data + 16, r1[i%8]); - mix((uint8_t*)data + 32, r2[i%8]); - mix((uint8_t*)data + 48, r3[i%8]); - mix((uint8_t*)data + 64, r4[i%8]); - mix((uint8_t*)data + 80, r5[i%8]); - mix((uint8_t*)data + 96, r6[i%8]); - mix((uint8_t*)data +112, r7[i%8]); + threefish_mix((uint8_t*)data + 0, r0[i%8]); + threefish_mix((uint8_t*)data + 16, r1[i%8]); + threefish_mix((uint8_t*)data + 32, r2[i%8]); + threefish_mix((uint8_t*)data + 48, r3[i%8]); + threefish_mix((uint8_t*)data + 64, r4[i%8]); + threefish_mix((uint8_t*)data + 80, r5[i%8]); + threefish_mix((uint8_t*)data + 96, r6[i%8]); + threefish_mix((uint8_t*)data +112, r7[i%8]); permute_16(data); ++i; }while(i!=80); diff --git a/threefish256_enc.c b/threefish256_enc.c index be0d5db..afb1a25 100644 --- a/threefish256_enc.c +++ b/threefish256_enc.c @@ -30,16 +30,6 @@ #include #include "threefish.h" -#define X0 (((uint64_t*)data)[0]) -#define X1 (((uint64_t*)data)[1]) -static -void mix(void* data, uint8_t rot){ - uint64_t x; - x = X1; - X0 += x; - X1 = ((x<>(64-rot))) ^ X0; -} - #define X(a) (((uint64_t*)data)[(a)]) static void permute_4(void* data){ @@ -82,8 +72,8 @@ void threefish256_enc(void* data, threefish256_ctx_t* ctx){ add_key_4(data, ctx, s); ++s; } - mix(data, r0[i%8]); - mix((uint8_t*)data + 16, r1[i%8]); + threefish_mix(data, r0[i%8]); + threefish_mix((uint8_t*)data + 16, r1[i%8]); permute_4(data); ++i; }while(i!=72); diff --git a/threefish256_enc_asm.S b/threefish256_enc_asm.S new file mode 100644 index 0000000..7fe3471 --- /dev/null +++ b/threefish256_enc_asm.S @@ -0,0 +1,411 @@ +/* threefish_mix.S */ +/* + This file is part of the AVR-Crypto-Lib. + Copyright (C) 2009 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ +/* + * \author Daniel Otte + * \email daniel.otte@rub.de + * \date 2009-03-16 + * \license GPLv3 or later + */ + +#include "avr-asm-macros.S" + +/******************************************************************************/ +A0 = 14 +A1 = 15 +A2 = 16 +A3 = 17 +A4 = 18 +A5 = 19 +A6 = 20 +A7 = 21 +/* +#define THREEFISH_KEY_CONST 0x5555.5555.5555.5555.LL / * 2**64/3 * / + +#define K(s) (((uint64_t*)key)[(s)]) +#define T(s) (((uint64_t*)tweak)[(s)]) + +void threefish256_init(void* key, void* tweak, threefish256_ctx_t* ctx){ + memcpy(ctx->k, key, 4*8); + memcpy(ctx->t, tweak, 2*8); + uint8_t i; + ctx->k[4] = THREEFISH_KEY_CONST; + for(i=0; i<4; ++i){ + ctx->k[4] ^= K(i); + } + ctx->t[2] = T(0) ^ T(1); +} +*/ +/* + * param key: r24:r25 + * param tweak: r22:r23 + * param ctx: r20:r21 + */ +.global threefish256_init +threefish256_init: + push_range 14, 17 + movw r30, r20 + movw r26, r24 + ldi r24, 4 + ldi A7, 0x55 + mov A6, A7 + movw A4, A6 + movw A2, A6 + movw A0, A6 +1: + ld r0, X+ + st Z+, r0 + eor A0, r0 + ld r0, X+ + st Z+, r0 + eor A1, r0 + ld r0, X+ + st Z+, r0 + eor A2, r0 + ld r0, X+ + st Z+, r0 + eor A3, r0 + ld r0, X+ + st Z+, r0 + eor A4, r0 + ld r0, X+ + st Z+, r0 + eor A5, r0 + ld r0, X+ + st Z+, r0 + eor A6, r0 + ld r0, X+ + st Z+, r0 + eor A7, r0 + dec r24 + brne 1b + st Z+, A0 + st Z+, A1 + st Z+, A2 + st Z+, A3 + st Z+, A4 + st Z+, A5 + st Z+, A6 + st Z+, A7 + /* now the tweak */ + movw r26, r22 + ld A0, X+ + ld A1, X+ + ld A2, X+ + ld A3, X+ + ld A4, X+ + ld A5, X+ + ld A6, X+ + ld A7, X+ + st Z+, A0 + st Z+, A1 + st Z+, A2 + st Z+, A3 + st Z+, A4 + st Z+, A5 + st Z+, A6 + st Z+, A7 + ld r0, X+ + eor A0, r0 + st Z+, r0 + ld r0, X+ + eor A1, r0 + st Z+, r0 + ld r0, X+ + eor A2, r0 + st Z+, r0 + ld r0, X+ + eor A3, r0 + st Z+, r0 + ld r0, X+ + eor A4, r0 + st Z+, r0 + ld r0, X+ + eor A5, r0 + st Z+, r0 + ld r0, X+ + eor A6, r0 + st Z+, r0 + ld r0, X+ + eor A7, r0 + st Z+, r0 + st Z+, A0 + st Z+, A1 + st Z+, A2 + st Z+, A3 + st Z+, A4 + st Z+, A5 + st Z+, A6 + st Z+, A7 + pop_range 14, 17 + ret + +/******************************************************************************/ +/* +#define X(a) (((uint64_t*)data)[(a)]) +void permute_4(void* data){ + uint64_t t; + t = X(1); + X(1) = X(3); + X(3) = t; +} +void add_key_4(void* data, threefish256_ctx_t* ctx, uint8_t s){ /* s: 0..19 * / + X(0) += ctx->k[(s+0)%5]; + X(1) += ctx->k[(s+1)%5] + ctx->t[s%3]; + X(2) += ctx->k[(s+2)%5] + ctx->t[(s+1)%3]; + X(3) += ctx->k[(s+3)%5] + s; +} +void threefish256_enc(void* data, threefish256_ctx_t* ctx){ + uint8_t i=0,s=0; + uint8_t r0[8] = { 5, 36, 13, 58, 26, 53, 11, 59}; + uint8_t r1[8] = {56, 28, 46, 44, 20, 35, 42, 50}; + do{ + if(i%4==0){ + add_key_4(data, ctx, s); + ++s; + } + threefish_mix(data, r0[i%8]); + threefish_mix((uint8_t*)data + 16, r1[i%8]); + permute_4(data); + ++i; + }while(i!=72); + add_key_4(data, ctx, s); +} +*/ +I = 2 +S = 3 +DATA0 = 4 +DATA1 = 5 +CTX0 = 6 +CTX1 = 7 +IDX0 = 8 +IDX1 = 9 +IDX2 = 10 +IDX3 = 11 +/* + * param data: r24:r25 + * param ctx: r22:r23 + */ +.global threefish256_enc +threefish256_enc: + push r28 + push r29 + push_range 2, 17 + movw DATA0, r24 + movw CTX0, r22 + clr I + clr S +1: + mov r30, I + andi r30, 0x03 + breq 2f + rjmp 4f +2: + ldi r30, lo8(threefish256_slut5) + ldi r31, hi8(threefish256_slut5) + mov r26, S + add r30, r26 + adc r31, r1 + lpm IDX0, Z+ + lpm IDX1, Z+ + lpm IDX2, Z+ + lpm IDX3, Z + movw r30, CTX0 + movw r26, DATA0 + add r30, IDX0 + adc r31, r1 + rcall add_z_to_x8 + movw r30, CTX0 + add r30, IDX1 + adc r31, r1 + rcall add_z_to_x8 + movw r30, CTX0 + add r30, IDX2 + adc r31, r1 + rcall add_z_to_x8 + movw r30, CTX0 + add r30, IDX3 + adc r31, r1 + rcall add_z_to_x8 + + /* now the remaining key */ + sbiw r26, 3*8 + ldi r30, lo8(threefish256_slut3) + ldi r31, hi8(threefish256_slut3) + add r30, S + adc r31, r1 + lpm IDX0, Z+ + lpm IDX1, Z + movw r30, CTX0 + adiw r30, 5*8 + movw IDX2, r30 + add r30, IDX0 + adc r31, r1 + rcall add_z_to_x8 + movw r30, IDX2 + add r30, IDX1 + adc r31, r1 + rcall add_z_to_x8 + ld r0, X + add r0, S + st X+, r0 + ld r0, X + adc r0, r1 + st X+, r0 + ld r0, X + adc r0, r1 + st X+, r0 + ld r0, X + adc r0, r1 + st X+, r0 + ld r0, X + adc r0, r1 + st X+, r0 + ld r0, X + adc r0, r1 + st X+, r0 + ld r0, X + adc r0, r1 + st X+, r0 + ld r0, X + adc r0, r1 + st X+, r0 + inc S + mov r26, S + cpi r26, 19 + brmi 4f +exit: + pop_range 2, 17 + pop r29 + pop r28 + ret +4: + /* call mix */ + ldi r30, lo8(threefish256_rc0) + ldi r31, hi8(threefish256_rc0) + mov r26, I + andi r26, 0x07 + add r30, r26 + adc r31, r1 + lpm r22, Z + adiw r30, 8 + lpm IDX0, Z + movw r24, DATA0 + call threefish_mix_asm /* no rcall? */ + movw r24, DATA0 + adiw r24, 16 + mov r22, IDX0 + call threefish_mix_asm /* no rcall? */ + /* now the permutation */ + movw r26, DATA0 + adiw r26, 8 + movw r30, r26 + adiw r30, 16 + ld IDX0, X + ld IDX1, Z + st X+, IDX1 + st Z+, IDX0 + ld IDX0, X + ld IDX1, Z + st X+, IDX1 + st Z+, IDX0 + ld IDX0, X + ld IDX1, Z + st X+, IDX1 + st Z+, IDX0 + ld IDX0, X + ld IDX1, Z + st X+, IDX1 + st Z+, IDX0 + ld IDX0, X + ld IDX1, Z + st X+, IDX1 + st Z+, IDX0 + ld IDX0, X + ld IDX1, Z + st X+, IDX1 + st Z+, IDX0 + ld IDX0, X + ld IDX1, Z + st X+, IDX1 + st Z+, IDX0 + ld IDX0, X + ld IDX1, Z + st X+, IDX1 + st Z+, IDX0 + inc I + rjmp 1b + +threefish256_slut5: + .byte 0x00, 0x08, 0x10, 0x18, 0x20, 0x00, 0x08, 0x10 + .byte 0x18, 0x20, 0x00, 0x08, 0x10, 0x18, 0x20, 0x00 + .byte 0x08, 0x10, 0x18, 0x20, 0x00, 0x08, 0x10 +threefish256_slut3: + .byte 0x00, 0x08, 0x10, 0x00, 0x08, 0x10, 0x00, 0x08 + .byte 0x10, 0x00, 0x08, 0x10, 0x00, 0x08, 0x10, 0x00 + .byte 0x08, 0x10, 0x00, 0x08, 0x10, 0x00, 0x08 +;threefish256_rc0: .byte 5, 36, 13, 58, 26, 53, 11, 59 +;threefish256_rc1: .byte 56, 28, 46, 44, 20, 35, 42, 50 +threefish256_rc0: .byte 0x1b, 0x44, 0x2b, 0x72, 0x32, 0x7b, 0x13, 0x73 +threefish256_rc1: .byte 0x70, 0x34, 0x6a, 0x54, 0x24, 0x43, 0x52, 0x62 + +add_z_to_x8: + ld r0, Z+ + ld r1, X + add r1, r0 + st X+, r1 + ld r0, Z+ + ld r1, X + adc r1, r0 + st X+, r1 + ld r0, Z+ + ld r1, X + adc r1, r0 + st X+, r1 + ld r0, Z+ + ld r1, X + adc r1, r0 + st X+, r1 + ld r0, Z+ + ld r1, X + adc r1, r0 + st X+, r1 + ld r0, Z+ + ld r1, X + adc r1, r0 + st X+, r1 + ld r0, Z+ + ld r1, X + adc r1, r0 + st X+, r1 + ld r0, Z+ + ld r1, X + adc r1, r0 + st X+, r1 + clr r1 + ret + + + + + + + + + + diff --git a/threefish256_enc_small.S b/threefish256_enc_small.S new file mode 100644 index 0000000..c1b1152 --- /dev/null +++ b/threefish256_enc_small.S @@ -0,0 +1,350 @@ +/* threefish_mix.S */ +/* + This file is part of the AVR-Crypto-Lib. + Copyright (C) 2009 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ +/* + * \author Daniel Otte + * \email daniel.otte@rub.de + * \date 2009-03-16 + * \license GPLv3 or later + */ + +#include "avr-asm-macros.S" + +/******************************************************************************/ +A0 = 14 +A1 = 15 +A2 = 16 +A3 = 17 +A4 = 18 +A5 = 19 +A6 = 20 +A7 = 21 +/* +#define THREEFISH_KEY_CONST 0x5555.5555.5555.5555.LL / * 2**64/3 * / + +#define K(s) (((uint64_t*)key)[(s)]) +#define T(s) (((uint64_t*)tweak)[(s)]) + +void threefish256_init(void* key, void* tweak, threefish256_ctx_t* ctx){ + memcpy(ctx->k, key, 4*8); + memcpy(ctx->t, tweak, 2*8); + uint8_t i; + ctx->k[4] = THREEFISH_KEY_CONST; + for(i=0; i<4; ++i){ + ctx->k[4] ^= K(i); + } + ctx->t[2] = T(0) ^ T(1); +} +*/ +/* + * param key: r24:r25 + * param tweak: r22:r23 + * param ctx: r20:r21 + */ +.global threefish256_init +threefish256_init: + push_range 14, 17 + movw r30, r20 + movw r26, r24 + ldi r24, 4 + ldi A7, 0x55 + mov A6, A7 + movw A4, A6 + movw A2, A6 + movw A0, A6 +1: + ld r0, X+ + st Z+, r0 + eor A0, r0 + ld r0, X+ + st Z+, r0 + eor A1, r0 + ld r0, X+ + st Z+, r0 + eor A2, r0 + ld r0, X+ + st Z+, r0 + eor A3, r0 + ld r0, X+ + st Z+, r0 + eor A4, r0 + ld r0, X+ + st Z+, r0 + eor A5, r0 + ld r0, X+ + st Z+, r0 + eor A6, r0 + ld r0, X+ + st Z+, r0 + eor A7, r0 + dec r24 + brne 1b + st Z+, A0 + st Z+, A1 + st Z+, A2 + st Z+, A3 + st Z+, A4 + st Z+, A5 + st Z+, A6 + st Z+, A7 + /* now the tweak */ + movw r26, r22 + ld A0, X+ + ld A1, X+ + ld A2, X+ + ld A3, X+ + ld A4, X+ + ld A5, X+ + ld A6, X+ + ld A7, X+ + st Z+, A0 + st Z+, A1 + st Z+, A2 + st Z+, A3 + st Z+, A4 + st Z+, A5 + st Z+, A6 + st Z+, A7 + ld r0, X+ + eor A0, r0 + st Z+, r0 + ld r0, X+ + eor A1, r0 + st Z+, r0 + ld r0, X+ + eor A2, r0 + st Z+, r0 + ld r0, X+ + eor A3, r0 + st Z+, r0 + ld r0, X+ + eor A4, r0 + st Z+, r0 + ld r0, X+ + eor A5, r0 + st Z+, r0 + ld r0, X+ + eor A6, r0 + st Z+, r0 + ld r0, X+ + eor A7, r0 + st Z+, r0 + st Z+, A0 + st Z+, A1 + st Z+, A2 + st Z+, A3 + st Z+, A4 + st Z+, A5 + st Z+, A6 + st Z+, A7 + pop_range 14, 17 + ret + +/******************************************************************************/ +/* +#define X(a) (((uint64_t*)data)[(a)]) +void permute_4(void* data){ + uint64_t t; + t = X(1); + X(1) = X(3); + X(3) = t; +} +void add_key_4(void* data, threefish256_ctx_t* ctx, uint8_t s){ /* s: 0..19 * / + X(0) += ctx->k[(s+0)%5]; + X(1) += ctx->k[(s+1)%5] + ctx->t[s%3]; + X(2) += ctx->k[(s+2)%5] + ctx->t[(s+1)%3]; + X(3) += ctx->k[(s+3)%5] + s; +} +void threefish256_enc(void* data, threefish256_ctx_t* ctx){ + uint8_t i=0,s=0; + uint8_t r0[8] = { 5, 36, 13, 58, 26, 53, 11, 59}; + uint8_t r1[8] = {56, 28, 46, 44, 20, 35, 42, 50}; + do{ + if(i%4==0){ + add_key_4(data, ctx, s); + ++s; + } + threefish_mix(data, r0[i%8]); + threefish_mix((uint8_t*)data + 16, r1[i%8]); + permute_4(data); + ++i; + }while(i!=72); + add_key_4(data, ctx, s); +} +*/ +I = 2 +S = 3 +DATA0 = 4 +DATA1 = 5 +CTX0 = 6 +CTX1 = 7 +IDX0 = 8 +IDX1 = 9 +IDX2 = 10 +IDX3 = 11 +/* + * param data: r24:r25 + * param ctx: r22:r23 + */ +.global threefish256_enc +threefish256_enc: + push r28 + push r29 + push_range 2, 17 + movw DATA0, r24 + movw CTX0, r22 + clr I + clr S +1: + mov r30, I + andi r30, 0x03 + breq 2f + rjmp 4f +2: + ldi r30, lo8(threefish256_slut5) + ldi r31, hi8(threefish256_slut5) + mov r26, S + add r30, r26 + adc r31, r1 + lpm IDX0, Z+ + lpm IDX1, Z+ + lpm IDX2, Z+ + lpm IDX3, Z + movw r30, CTX0 + movw r26, DATA0 + add r30, IDX0 + adc r31, r1 + rcall add_z_to_x8 + movw r30, CTX0 + add r30, IDX1 + adc r31, r1 + rcall add_z_to_x8 + movw r30, CTX0 + add r30, IDX2 + adc r31, r1 + rcall add_z_to_x8 + movw r30, CTX0 + add r30, IDX3 + adc r31, r1 + rcall add_z_to_x8 + + /* now the remaining key */ + sbiw r26, 3*8 + ldi r30, lo8(threefish256_slut3) + ldi r31, hi8(threefish256_slut3) + add r30, S + adc r31, r1 + lpm IDX0, Z+ + lpm IDX1, Z + movw r30, CTX0 + adiw r30, 5*8 + movw IDX2, r30 + add r30, IDX0 + adc r31, r1 + rcall add_z_to_x8 + movw r30, IDX2 + add r30, IDX1 + adc r31, r1 + rcall add_z_to_x8 + ld r0, X + add r0, S + st X+, r0 + ldi r16, 7 +3: + ld r0, X + adc r0, r1 + st X+, r0 + dec r16 + brne 3b + inc S + mov r26, S + cpi r26, 19 + brmi 4f +exit: + pop_range 2, 17 + pop r29 + pop r28 + ret +4: + /* call mix */ + ldi r30, lo8(threefish256_rc0) + ldi r31, hi8(threefish256_rc0) + mov r26, I + andi r26, 0x07 + add r30, r26 + adc r31, r1 + lpm r22, Z + adiw r30, 8 + lpm IDX0, Z + movw r24, DATA0 + call threefish_mix_asm /* no rcall? */ + movw r24, DATA0 + adiw r24, 16 + mov r22, IDX0 + call threefish_mix_asm /* no rcall? */ + /* now the permutation */ + movw r26, DATA0 + adiw r26, 8 + movw r30, r26 + adiw r30, 16 + ldi r16, 8 +3: ld IDX0, X + ld IDX1, Z + st X+, IDX1 + st Z+, IDX0 + dec r16 + brne 3b + inc I + rjmp 1b + +threefish256_slut5: + .byte 0x00, 0x08, 0x10, 0x18, 0x20, 0x00, 0x08, 0x10 + .byte 0x18, 0x20, 0x00, 0x08, 0x10, 0x18, 0x20, 0x00 + .byte 0x08, 0x10, 0x18, 0x20, 0x00, 0x08, 0x10 +threefish256_slut3: + .byte 0x00, 0x08, 0x10, 0x00, 0x08, 0x10, 0x00, 0x08 + .byte 0x10, 0x00, 0x08, 0x10, 0x00, 0x08, 0x10, 0x00 + .byte 0x08, 0x10, 0x00, 0x08, 0x10, 0x00, 0x08 +threefish256_rc0: .byte 0x1b, 0x44, 0x2b, 0x72, 0x32, 0x7b, 0x13, 0x73 +threefish256_rc1: .byte 0x70, 0x34, 0x6a, 0x54, 0x24, 0x43, 0x52, 0x62 + +add_z_to_x8: + ld r0, Z+ + ld r1, X + add r1, r0 + st X+, r1 + ldi r16, 7 +1: + ld r0, Z+ + ld r1, X + adc r1, r0 + st X+, r1 + dec r16 + brne 1b + clr r1 + ret + + + + + + + + + + diff --git a/threefish512_enc.c b/threefish512_enc.c index abd1afc..abb8509 100644 --- a/threefish512_enc.c +++ b/threefish512_enc.c @@ -30,18 +30,10 @@ #include #include "threefish.h" -#define X0 (((uint64_t*)data)[0]) -#define X1 (((uint64_t*)data)[1]) -static -void mix(void* data, uint8_t rot){ - uint64_t x; - x = X1; - X0 += x; - X1 = ((x<>(64-rot))) ^ X0; -} #define X(a) (((uint64_t*)data)[(a)]) + static void permute_8(void* data){ uint64_t t; @@ -107,10 +99,10 @@ void threefish512_enc(void* data, threefish512_ctx_t* ctx){ add_key_8(data, ctx, s); ++s; } - mix((uint8_t*)data + 0, r0[i%8]); - mix((uint8_t*)data + 16, r1[i%8]); - mix((uint8_t*)data + 32, r2[i%8]); - mix((uint8_t*)data + 48, r3[i%8]); + threefish_mix((uint8_t*)data + 0, r0[i%8]); + threefish_mix((uint8_t*)data + 16, r1[i%8]); + threefish_mix((uint8_t*)data + 32, r2[i%8]); + threefish_mix((uint8_t*)data + 48, r3[i%8]); permute_8(data); ++i; }while(i!=72); diff --git a/threefish_mix.S b/threefish_mix.S new file mode 100644 index 0000000..2a35940 --- /dev/null +++ b/threefish_mix.S @@ -0,0 +1,303 @@ +/* threefish_mix.S */ +/* + This file is part of the AVR-Crypto-Lib. + Copyright (C) 2009 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ +/* + * \author Daniel Otte + * \email daniel.otte@rub.de + * \date 2009-03-16 + * \license GPLv3 or later + */ + +#include "avr-asm-macros.S" + +/* +#define B0 (((uint64_t*)data)[0]) +#define B1 (((uint64_t*)data)[1]) +static +void mix(void* data, uint8_t rot){ + uint64_t x; + x = B1; + B0 += x; + B1 = ((x<>(64-rot))) ^ B0; +} +*/ +A0 = 10 +A1 = 11 +A2 = 12 +A3 = 13 +A4 = 14 +A5 = 15 +A6 = 16 +A7 = 17 + +B0 = 18 +B1 = 19 +B2 = 20 +B3 = 21 +B4 = 22 +B5 = 23 +B6 = 24 +B7 = 25 +vROT = 27 +/* + * param data: r24:r25 + * param rot: r22 + */ + +.global threefish_mix_asm +threefish_mix_asm: + movw r28, r24 + mov vROT,r22 + ldd A0, Y+ 0 + ldd A1, Y+ 1 + ldd A2, Y+ 2 + ldd A3, Y+ 3 + ldd A4, Y+ 4 + ldd A5, Y+ 5 + ldd A6, Y+ 6 + ldd A7, Y+ 7 + ldd B0, Y+ 8 + ldd B1, Y+ 9 + ldd B2, Y+10 + ldd B3, Y+11 + ldd B4, Y+12 + ldd B5, Y+13 + ldd B6, Y+14 + ldd B7, Y+15 + add A0, B0 + adc A1, B1 + adc A2, B2 + adc A3, B3 + adc A4, B4 + adc A5, B5 + adc A6, B6 + adc A7, B7 + + mov r26, vROT + swap r26 + andi r26, 0x07 + ldi r30, pm_lo8(byte_rot_jmptable) + ldi r31, pm_hi8(byte_rot_jmptable) + add r30, r26 + adc r31, r1 + ijmp +post_byterot: + bst vROT, 3 + andi vROT, 0x07 + brts 1f + rjmp bit_rotl +1: rjmp bit_rotr +post_bitrot: + eor B0, A0 + eor B1, A1 + eor B2, A2 + eor B3, A3 + eor B4, A4 + eor B5, A5 + eor B6, A6 + eor B7, A7 + + std Y+ 0, A0 + std Y+ 1, A1 + std Y+ 2, A2 + std Y+ 3, A3 + std Y+ 4, A4 + std Y+ 5, A5 + std Y+ 6, A6 + std Y+ 7, A7 + std Y+ 8, B0 + std Y+ 9, B1 + std Y+10, B2 + std Y+11, B3 + std Y+12, B4 + std Y+13, B5 + std Y+14, B6 + std Y+15, B7 +exit: + ret + +byte_rot_jmptable: + rjmp post_byterot;ret; rjmp byte_rotr_0 + rjmp byte_rotr_7 + rjmp byte_rotr_6 + rjmp byte_rotr_5 + rjmp byte_rotr_4 + rjmp byte_rotr_3 + rjmp byte_rotr_2 + rjmp byte_rotr_1 + rjmp post_byterot;ret; rjmp byte_rotr_0 + +; 0 1 2 3 4 5 6 7 +; 1 2 3 4 5 6 7 0 + +byte_rotr_1: /* 10 words */ + mov r0, B0 + mov B0, B1 + mov B1, B2 + mov B2, B3 + mov B3, B4 + mov B4, B5 + mov B5, B6 + mov B6, B7 + mov B7, r0 +byte_rotr_0: + rjmp post_byterot + +; 0 1 2 3 4 5 6 7 +; 2 3 4 5 6 7 0 1 + +byte_rotr_2: /* 11 words */ + mov r0, B0 + mov B0, B2 + mov B2, B4 + mov B4, B6 + mov B6, r0 + mov r0, B1 + mov B1, B3 + mov B3, B5 + mov B5, B7 + mov B7, r0 + rjmp post_byterot + +; 0 1 2 3 4 5 6 7 +; 3 4 5 6 7 0 1 2 + +byte_rotr_3: /* 10 words */ + mov r0, B0 + mov B0, B3 + mov B3, B6 + mov B6, B1 + mov B1, B4 + mov B4, B7 + mov B7, B2 + mov B2, B5 + mov B5, r0 + rjmp post_byterot + +; 0 1 2 3 4 5 6 7 +; 4 5 6 7 0 1 2 3 +byte_rotr_4: /* 13 words */ + mov r0, B0 + mov B0, B4 + mov B4, r0 + + mov r0, B1 + mov B1, B5 + mov B5, r0 + + mov r0, B2 + mov B2, B6 + mov B6, r0 + + mov r0, B3 + mov B3, B7 + mov B7, r0 + rjmp post_byterot + +; 0 1 2 3 4 5 6 7 +; 5 6 7 0 1 2 3 4 +byte_rotr_5: /* 10 words */ + mov r0, B0 + mov B0, B5 + mov B5, B2 + mov B2, B7 + mov B7, B4 + mov B4, B1 + mov B1, B6 + mov B6, B3 + mov B3, r0 + rjmp post_byterot + +; 0 1 2 3 4 5 6 7 +; 6 7 0 1 2 3 4 5 +byte_rotr_6: /* 11 words */ + mov r0, B0 + mov B0, B6 + mov B6, B4 + mov B4, B2 + mov B2, r0 + + mov r0, B1 + mov B1, B7 + mov B7, B5 + mov B5, B3 + mov B3, r0 + rjmp post_byterot + +; 0 1 2 3 4 5 6 7 +; 7 0 1 2 3 4 5 6 +byte_rotr_7: /* 10 words */ + mov r0, B7 + mov B7, B6 + mov B6, B5 + mov B5, B4 + mov B4, B3 + mov B3, B2 + mov B2, B1 + mov B1, B0 + mov B0, r0 + rjmp post_byterot + +bit_rotl: + tst vROT + brne 1f + rjmp post_bitrot +1: mov r0, B7 + rol r0 + rol B0 + rol B1 + rol B2 + rol B3 + rol B4 + rol B5 + rol B6 + rol B7 + dec vROT + rjmp bit_rotl + +bit_rotr: + tst vROT + brne 1f + rjmp post_bitrot +1: mov r0, B0 + ror r0 + ror B7 + ror B6 + ror B5 + ror B4 + ror B3 + ror B2 + ror B1 + ror B0 + dec vROT + rjmp bit_rotr + + + + + + + + + + + + + + + diff --git a/threefish_mix_4c.S b/threefish_mix_4c.S new file mode 100644 index 0000000..063232e --- /dev/null +++ b/threefish_mix_4c.S @@ -0,0 +1,328 @@ +/* threefish_mix.S */ +/* + This file is part of the AVR-Crypto-Lib. + Copyright (C) 2009 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ +/* + * \author Daniel Otte + * \email daniel.otte@rub.de + * \date 2009-03-16 + * \license GPLv3 or later + */ + +#include "avr-asm-macros.S" + +/* +#define B0 (((uint64_t*)data)[0]) +#define B1 (((uint64_t*)data)[1]) +static +void mix(void* data, uint8_t rot){ + uint64_t x; + x = B1; + B0 += x; + B1 = ((x<>(64-rot))) ^ B0; +} +*/ +A0 = 10 +A1 = 11 +A2 = 12 +A3 = 13 +A4 = 14 +A5 = 15 +A6 = 16 +A7 = 17 + +B0 = 18 +B1 = 19 +B2 = 20 +B3 = 21 +B4 = 22 +B5 = 23 +B6 = 24 +B7 = 25 +vROT = 27 +/* + * param data: r24:r25 + * param rot: r22 + */ + +.global threefish_mix +threefish_mix: + push r28 + push r29 + push_range 10, 17 + movw r28, r24 + mov vROT,r22 + ldd A0, Y+ 0 + ldd A1, Y+ 1 + ldd A2, Y+ 2 + ldd A3, Y+ 3 + ldd A4, Y+ 4 + ldd A5, Y+ 5 + ldd A6, Y+ 6 + ldd A7, Y+ 7 + ldd B0, Y+ 8 + ldd B1, Y+ 9 + ldd B2, Y+10 + ldd B3, Y+11 + ldd B4, Y+12 + ldd B5, Y+13 + ldd B6, Y+14 + ldd B7, Y+15 + add A0, B0 + adc A1, B1 + adc A2, B2 + adc A3, B3 + adc A4, B4 + adc A5, B5 + adc A6, B6 + adc A7, B7 + + mov r26, vROT + adiw r26, 3 + lsr r26 + lsr r26 + lsr r26 +; andi r26, 0x07 + ldi r30, pm_lo8(byte_rot_jmptable) + ldi r31, pm_hi8(byte_rot_jmptable) + add r30, r26 + adc r31, r1 + ijmp +post_byterot: + ldi r30, lo8(bit_rot_lut) + ldi r31, hi8(bit_rot_lut) + andi vROT, 0x07 + add r30, vROT + adc r31, r1 + lpm r27, Z + bst r27, 7 + andi r27, 0x07 + brts 1f + rjmp bit_rotl +1: rjmp bit_rotr +post_bitrot: + eor B0, A0 + eor B1, A1 + eor B2, A2 + eor B3, A3 + eor B4, A4 + eor B5, A5 + eor B6, A6 + eor B7, A7 + + std Y+ 0, A0 + std Y+ 1, A1 + std Y+ 2, A2 + std Y+ 3, A3 + std Y+ 4, A4 + std Y+ 5, A5 + std Y+ 6, A6 + std Y+ 7, A7 + std Y+ 8, B0 + std Y+ 9, B1 + std Y+10, B2 + std Y+11, B3 + std Y+12, B4 + std Y+13, B5 + std Y+14, B6 + std Y+15, B7 +exit: + pop_range 10, 17 + pop r29 + pop r28 + ret + +bit_rot_lut: + .byte 0x00 + .byte 0x01 + .byte 0x02 + .byte 0x03 + .byte 0x04 + .byte 0x83 + .byte 0x82 + .byte 0x81 + +byte_rot_jmptable: + rjmp post_byterot;ret; rjmp byte_rotr_0 + rjmp byte_rotr_7 + rjmp byte_rotr_6 + rjmp byte_rotr_5 + rjmp byte_rotr_4 + rjmp byte_rotr_3 + rjmp byte_rotr_2 + rjmp byte_rotr_1 + rjmp post_byterot;ret; rjmp byte_rotr_0 + +; 0 1 2 3 4 5 6 7 +; 1 2 3 4 5 6 7 0 + +byte_rotr_1: /* 10 words */ + mov r0, B0 + mov B0, B1 + mov B1, B2 + mov B2, B3 + mov B3, B4 + mov B4, B5 + mov B5, B6 + mov B6, B7 + mov B7, r0 +byte_rotr_0: + rjmp post_byterot + +; 0 1 2 3 4 5 6 7 +; 2 3 4 5 6 7 0 1 + +byte_rotr_2: /* 11 words */ + mov r0, B0 + mov B0, B2 + mov B2, B4 + mov B4, B6 + mov B6, r0 + mov r0, B1 + mov B1, B3 + mov B3, B5 + mov B5, B7 + mov B7, r0 + rjmp post_byterot + +; 0 1 2 3 4 5 6 7 +; 3 4 5 6 7 0 1 2 + +byte_rotr_3: /* 10 words */ + mov r0, B0 + mov B0, B3 + mov B3, B6 + mov B6, B1 + mov B1, B4 + mov B4, B7 + mov B7, B2 + mov B2, B5 + mov B5, r0 + rjmp post_byterot + +; 0 1 2 3 4 5 6 7 +; 4 5 6 7 0 1 2 3 +byte_rotr_4: /* 13 words */ + mov r0, B0 + mov B0, B4 + mov B4, r0 + + mov r0, B1 + mov B1, B5 + mov B5, r0 + + mov r0, B2 + mov B2, B6 + mov B6, r0 + + mov r0, B3 + mov B3, B7 + mov B7, r0 + rjmp post_byterot + +; 0 1 2 3 4 5 6 7 +; 5 6 7 0 1 2 3 4 +byte_rotr_5: /* 10 words */ + mov r0, B0 + mov B0, B5 + mov B5, B2 + mov B2, B7 + mov B7, B4 + mov B4, B1 + mov B1, B6 + mov B6, B3 + mov B3, r0 + rjmp post_byterot + +; 0 1 2 3 4 5 6 7 +; 6 7 0 1 2 3 4 5 +byte_rotr_6: /* 11 words */ + mov r0, B0 + mov B0, B6 + mov B6, B4 + mov B4, B2 + mov B2, r0 + + mov r0, B1 + mov B1, B7 + mov B7, B5 + mov B5, B3 + mov B3, r0 + rjmp post_byterot + +; 0 1 2 3 4 5 6 7 +; 7 0 1 2 3 4 5 6 +byte_rotr_7: /* 10 words */ + mov r0, B7 + mov B7, B6 + mov B6, B5 + mov B5, B4 + mov B4, B3 + mov B3, B2 + mov B2, B1 + mov B1, B0 + mov B0, r0 + rjmp post_byterot + +bit_rotl: + tst r27 + brne 1f + rjmp post_bitrot +1: mov r0, B7 + rol r0 + rol B0 + rol B1 + rol B2 + rol B3 + rol B4 + rol B5 + rol B6 + rol B7 + dec r27 + rjmp bit_rotl + +bit_rotr: + tst r27 + brne 1f + rjmp post_bitrot +1: mov r0, B0 + ror r0 + ror B7 + ror B6 + ror B5 + ror B4 + ror B3 + ror B2 + ror B1 + ror B0 + dec r27 + rjmp bit_rotr + + + + + + + + + + + + + + + diff --git a/threefish_mix_c.c b/threefish_mix_c.c new file mode 100644 index 0000000..2031bc8 --- /dev/null +++ b/threefish_mix_c.c @@ -0,0 +1,38 @@ +/* threefish_mix_c.c */ +/* + This file is part of the AVR-Crypto-Lib. + Copyright (C) 2009 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ +/* + * \author Daniel Otte + * \email daniel.otte@rub.de + * \date 2009-03-16 + * \license GPLv3 or later + * + * + * + */ + +#include + +#define X0 (((uint64_t*)data)[0]) +#define X1 (((uint64_t*)data)[1]) +void threefish_mix(void* data, uint8_t rot){ + uint64_t x; + x = X1; + X0 += x; + X1 = ((x<>(64-rot))) ^ X0; +} diff --git a/ubi256_asm.S b/ubi256_asm.S new file mode 100644 index 0000000..e5e6f9c --- /dev/null +++ b/ubi256_asm.S @@ -0,0 +1,327 @@ +/* ubi256_asm.S */ +/* + This file is part of the AVR-Crypto-Lib. + Copyright (C) 2009 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ +/* + * \author Daniel Otte + * \email daniel.otte@rub.de + * \date 2009-03-16 + * \license GPLv3 or later + */ + +#include "avr-asm-macros.S" + +/******************************************************************************/ +/* +void ubi256_init(ubi256_ctx_t* ctx, const void* g, uint8_t type){ + memset(ctx->tweak, 0, 15); + ctx->tweak[15] = 0x40+type; + memcpy(ctx->g, g, 32); +} +*/ +/* + * param ctx: r24:r25 + * param g: r22:r23 + * param type: r20 + */ +.global ubi256_init +ubi256_init: + movw r26, r24 + ldi r21, 15 +1: st X+, r1 + dec r21 + brne 1b + ori r20, 0x40 + st X+, r20 + ldi r21, 32 + movw r30, r22 +2: ld r20, Z+ + st X+, r20 + dec r21 + brne 2b + ret + +/******************************************************************************/ +/* +void ubi256_ctx2hash(void* dest, const ubi256_ctx_t* ctx){ + memcpy(dest, ctx->g, UBI256_BLOCKSIZE_B); +} +*/ +/* + * param dest: r24:r24 + * param ctx: r22:r23 + */ +.global ubi256_ctx2hash +ubi256_ctx2hash: + movw r26, r24 + movw r30, r22 + adiw r30, 16 + ldi r22, 32 +1: ld r23, Z+ + st X+, r23 + dec r22 + brne 1b + ret + +/******************************************************************************/ +/* +void ubi256_nextBlock(ubi256_ctx_t* ctx, const void* block){ + threefish256_ctx_t tfctx; + ((uint64_t*)(ctx->tweak))[0] += UBI256_BLOCKSIZE_B; + threefish256_init(ctx->g, ctx->tweak, &tfctx); + memcpy(ctx->g, block, UBI256_BLOCKSIZE_B); + threefish256_enc(ctx->g, &tfctx); + memxor(ctx->g, block, UBI256_BLOCKSIZE_B); + ctx->tweak[15] &= (uint8_t)~0x40; +} +*/ +/* + * param ctx: r24:r25 + * param block: r22:r23 + */ +CTX0 = 2 +CTX1 = 3 +BLOCK0 = 4 +BLOCK1 = 5 +TFCTX0 = 6 +TFCTX1 = 7 +.global ubi256_nextBlock +ubi256_nextBlock: + stack_alloc_large 64 + push_range 2, 7 + adiw r30, 1 /* Z points to tfctx */ + movw TFCTX0, r30 + movw CTX0, r24 + movw BLOCK0, r22 + movw r26, r24 +/* add BLOCKSIZE_B (32) to tweak */ + ldi r25, 32 + ld r24, X + add r24, r25 + st X+, r24 + ldi r25, 11 +1: ld r24, X + adc r24, r1 + st X+, r24 + dec r25 + brne 1b +/* call threefish256_init */ + movw r24, CTX0 + adiw r24, 16 + movw r22, CTX0 + movw CTX0, r24 /* CTX points to ctx->g */ + movw r20, TFCTX0 + rcall threefish256_init + /* copy block to ctx->g */ + movw r26, CTX0 + movw r30, BLOCK0 + ldi r25, 32 +1: ld r24, Z+ + st X+, r24 + dec r25 + brne 1b +/* call threefish256_enc */ + movw r24, CTX0 + movw r22, TFCTX0 + rcall threefish256_enc +/* xor block into ctx->g */ + movw r26, BLOCK0 + movw r30, CTX0 + ldi r25, 32 +1: ld r24, X+ + ld r23, Z + eor r23, r24 + st Z+, r23 + dec r25 + brne 1b +/* clear 'first' bit in tweak */ + sbiw r30, 33 + ld r24, Z + andi r24, ~0x40 + st Z, r24 +exit: + pop_range 2, 7 + stack_free_large 64 + ret + +/******************************************************************************/ +/* +void ubi256_lastBlock(ubi256_ctx_t* ctx, const void* block, uint16_t length_b){ + threefish256_ctx_t tfctx; + while(length_b>UBI256_BLOCKSIZE){ + ubi256_nextBlock(ctx, block); + block = (uint8_t*)block + UBI256_BLOCKSIZE_B; + length_b -= UBI256_BLOCKSIZE; + } + ctx->tweak[15] |= 0x80; + ((uint64_t*)(ctx->tweak))[0] += (length_b+7)/8; + if(length_b & 0x07){ + ctx->tweak[14] |= 0x80; + } + threefish256_init(ctx->g, ctx->tweak, &tfctx); + memset(ctx->g, 0, UBI256_BLOCKSIZE_B); + memcpy(ctx->g, block, (length_b+7)/8); + if(length_b & 0x07){ + ctx->g[((length_b+7)/8)-1] |= 0x80>>(length_b&7); + ctx->g[((length_b+7)/8)-1] &= ~((0x80>>(length_b&7))-1); + } + threefish256_enc(ctx->g, &tfctx); + memxor(ctx->g, block, (length_b+7)/8); + if(length_b & 0x07){ + ctx->g[((length_b+7)/8)-1] ^= 0x80>>(length_b&7); + } +} +*/ +/* + * param ctx: r24:r25 + * param block: r22:r23 + * param ength_b: r20:r21 + */ +MASK_B = 8 +LEN_B = 9 +TFCTX0 = 10 +TFCTX1 = 11 +CTX0 = 12 +CTX1 = 13 +BLOCK0 = 14 +BLOCK1 = 15 +LENGTH0 = 16 +LENGTH1 = 17 +.global ubi256_lastBlock +ubi256_lastBlock: +/* run nextBlock for preceding blocks*/ + push_range 8, 17 + movw CTX0, r24 + movw BLOCK0, r22 + movw LENGTH0, r20 +1: cpi LENGTH1, 2 + brlo 2f + movw r24, CTX0 + movw r22, BLOCK0 + rcall ubi256_nextBlock + ldi r25, 32 + add BLOCK0, r25 + adc BLOCK1, r1 + dec LENGTH1 + rjmp 1b +2: tst LENGTH1 + breq 3f + tst LENGTH0 + breq 3f + movw r24, CTX0 + movw r22, BLOCK0 + rcall ubi256_nextBlock + ldi r25, 32 + add BLOCK0, r25 + adc BLOCK1, r1 + dec LENGTH1 +3: /* now the real fun */ + stack_alloc_large 64 + adiw r30, 1 + movw TFCTX0, r30 + /* calculate LEN_B */ + movw r24, LENGTH0 + adiw r24, 7 + lsr r25 + ror r24 + lsr r24 + lsr r24 + mov LEN_B, r24 + /* add length to tweak */ + movw r30, CTX0 + ld r24, Z + add r24, LEN_B + st Z+, r24 + ldi r25, 11 +1: ld r24, Z + adc r24, r1 + st Z+, r24 + dec r25 + brne 1b + /* set 'final' bit*/ + movw r30, CTX0 + ldd r24, Z+15 + ori r24, 0x80 + std Z+15, r24 + /* store in T if we do bit processing and set 'BitPad' bit*/ + clr MASK_B + mov r24, LENGTH0 + andi r24, 0x07 + tst r24 + breq 4f + ldd r25, Z+14 + ori r25, 0x80 + std Z+14, r25 + ldi r25, 0x80 + mov MASK_B, r25 +1: lsr MASK_B + dec r24 + brne 1b +4: /* call threefish256_init*/ + movw r24, CTX0 + adiw r24, 16 + movw r22, CTX0 + movw CTX0, r24 /* CTX points at ctx->g */ + movw r20, TFCTX0 + rcall threefish256_init + /* copy block to ctx->g */ + movw r26, BLOCK0 + movw r30, CTX0 + mov r24, LEN_B + ldi r25, 32 + sub r25, LEN_B + tst r24 +1: breq 2f + ld r22, X+ + st Z+, r22 + dec r24 + rjmp 1b +2: tst MASK_B + breq 29f + or r22, MASK_B + st -Z, r22 + adiw r30, 1 +29: tst r25 +3: breq 4f + st Z+, r1 + dec r25 + rjmp 3b +4: /* call threefish256_enc */ + movw r24, CTX0 + movw r22, TFCTX0 + rcall threefish256_enc + /* xor block into ctx->g */ + movw r30, CTX0 + movw r26, BLOCK0 + tst LEN_B +5: breq 6f + ld r22, X+ + ld r23, Z + eor r23, r22 + st Z+, r23 + dec LEN_B + rjmp 5b +6: tst MASK_B + breq 7f + eor r23, MASK_B + st -Z, r23 + +7: stack_free_large 64 + pop_range 8, 17 + ret + + -- 2.39.2