#undef UART_LEDS
$debug = false
+require 'rubygems'
require 'serialport'
def init_system
return line
+def send_md(md_string)
+ for i in 0..md_string.length-1
+ $sp.print(md_string[i].chr)
+# print(md_string[i].chr)
+ if(i%20==19)
+ sleep(0.1)
+ end
+ end
def run_test(filename)
+ errors = 0
if not File.exist?(filename)
puts("ERROR file "+filename+" does not exist!")
end while not (file.eof or (/[\s]*Msg[\s]*=.*/.match(lb)))
return if file.eof
puts("DBG sending: "+lb) if $debug
- $sp.print(lb.strip)
+ send_md(lb.strip)
avr_md = get_md()
puts("") if (pos%$linewidth==0 and $linewidth!=0)
- putc((a==b)?'*':'!')
+ #putc((a==b)?'*':'!')
+ if(a==b)
+ putc('*')
+ else
+ putc('!')
+ errors += 1;
+ end
pos += 1
+ return errors
if ARGV.size < 6
puts("\nPort: "+ARGV[0]+ "@"+ARGV[1]+" "+ARGV[2]+"N"+ARGV[3]+"\n");
+puts("serial port interface version: " + SerialPort::VERSION);
$linewidth = 64
-$sp = SerialPort.new(ARGV[0], ARGV[1].to_i, ARGV[2].to_i, ARGV[3].to_i, SerialPort::NONE);
+$params = { "baud" => ARGV[1].to_i,
+ "data_bits" => ARGV[2].to_i,
+ "stop_bits" => ARGV[3].to_i,
+ "parity" => SerialPort::NONE }
+$sp = SerialPort.new(ARGV[0], $params)
+#$sp = SerialPort.new(ARGV[0], ARGV[1].to_i, ARGV[2].to_i, ARGV[3].to_i, SerialPort::NONE);
$sp.read_timeout=1000; # 5 minutes
+$sp.flow_control = SerialPort::SOFT
$algo_select = ARGV[4]
for i in (5..(ARGV.size-1))
- run_test(ARGV[i])
- puts("")
+ errors = run_test(ARGV[i])
+ if errors == 0
+ puts("[ok]")
+ else
+ puts("[errors: "+errors.to_s+"]")
+ end
# Makefile for Skein
# comment out the following line for removement of Skein from the build process
-$(ALGO_NAME)_OBJ := threefish256_enc.o threefish512_enc.o threefish1024_enc.o \
- ubi256.o ubi512.o ubi1024.o memxor.o skein256.o skein512.o skein1024.o
+$(ALGO_NAME)_OBJ := threefish_mix.o threefish256_enc_asm.o ubi256_asm.o skein256_asm.o \
+ threefish_mix_4c.o threefish512_enc.o threefish1024_enc.o \
+ ubi512.o ubi1024.o memxor.o skein512.o skein1024.o
$(ALGO_NAME)_TEST_BIN := main-skein-test.o debug.o uart.o hexdigit_tab.o \
dbz_strings.o nessie_common.o cli.o string-extras.o performance_test.o \
hfal-basic.o hfal_skein256.o hfal_skein512.o hfal_skein1024.o shavs.o
# Makefile for threefish
# comment out the following line for removement of threefish from the build process
-$(ALGO_NAME)_OBJ := threefish256_enc.o threefish512_enc.o threefish1024_enc.o
+$(ALGO_NAME)_OBJ := threefish256_enc_asm.o threefish512_enc.o threefish1024_enc.o\
+ threefish_mix.o threefish_mix_4c.o
$(ALGO_NAME)_TEST_BIN := main-threefish-test.o debug.o uart.o hexdigit_tab.o \
nessie_bc_test.o dbz_strings.o nessie_common.o cli.o string-extras.o performance_test.o
$(ALGO_NAME)_NESSIE_TEST := test nessie
# Makefile for UBI
# comment out the following line for removement of ubi from the build process
-$(ALGO_NAME)_OBJ := threefish256_enc.o threefish512_enc.o threefish1024_enc.o \
- ubi256.o ubi512.o ubi1024.o memxor.o
+$(ALGO_NAME)_OBJ := threefish_mix.o threefish256_enc_asm.o ubi256_asm.o threefish512_enc.o threefish1024_enc.o \
+ threefish_mix_4c.o ubi512.o ubi1024.o memxor.o
$(ALGO_NAME)_TEST_BIN := main-ubi-test.o debug.o uart.o hexdigit_tab.o \
dbz_strings.o nessie_common.o cli.o string-extras.o performance_test.o
$(ALGO_NAME)_NESSIE_TEST := test nessie
--- /dev/null
+/* skein256_asm.S */
+ This file is part of the AVR-Crypto-Lib.
+ Copyright (C) 2009 Daniel Otte (daniel.otte@rub.de)
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ GNU General Public License for more details.
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ * \author Daniel Otte
+ * \email daniel.otte@rub.de
+ * \date 2009-03-16
+ * \license GPLv3 or later
+ */
+#include "avr-asm-macros.S"
+void skein256_init(skein256_ctx_t* ctx, uint16_t outsize_b){
+ skein_config_t conf;
+ uint8_t null[UBI256_BLOCKSIZE_B];
+ memset(null, 0, UBI256_BLOCKSIZE_B);
+ memset(&conf, 0, sizeof(skein_config_t));
+ conf.schema[0] = 'S';
+ conf.schema[1] = 'H';
+ conf.schema[2] = 'A';
+ conf.schema[3] = '3';
+ conf.version = 1;
+ conf.out_length = outsize_b;
+ ctx->outsize_b = outsize_b;
+ ubi256_init(&(ctx->ubictx), null, UBI_TYPE_CFG);
+ ubi256_lastBlock(&(ctx->ubictx), &conf, 256);
+ ubi256_init(&(ctx->ubictx), ctx->ubictx.g, UBI_TYPE_MSG);
+ * param ctx: r24:r25
+ * param outsize_b: r22:r23
+ */
+UBICTX0 = 2
+UBICTX1 = 3
+CONF0 = 4
+CONF1 = 5
+.global skein256_init
+ push_range 2, 5
+ stack_alloc 64-22
+ adiw r30, 1
+ movw CONF0, r30
+ movw r26, r24
+ st X+, r22
+ st X+, r23
+ movw UBICTX0, r26
+ ldi r24, 'S'
+ st Z+, r24
+ ldi r24, 'H'
+ st Z+, r24
+ ldi r24, 'A'
+ st Z+, r24
+ ldi r24, '3'
+ st Z+, r24
+ ldi r24, 1
+ st Z+, r24
+ st Z+, r1
+ st Z+, r1
+ st Z+, r1
+ st Z+, r22
+ st Z+, r23
+ ldi 24, 22+10
+1: st Z+, r1
+ dec r24
+ brne 1b
+ /* call ubi256_init*/
+ sbiw r30, 32
+ movw r24, UBICTX0
+ movw r22, r30
+ ldi r20, 4
+ rcall ubi256_init
+ /* call ubi256_lastBlock*/
+ movw r24, UBICTX0
+ movw r22, CONF0
+ ldi r21, 1
+ clr r20
+ rcall ubi256_lastBlock
+ /* call ubi256_init*/
+ movw r24, UBICTX0
+ adiw r24, 16
+ movw r22, r24
+ movw r24, UBICTX0
+ ldi r20, 48
+ rcall ubi256_init
+ stack_free 64-22
+ pop_range 2, 5
+ ret
+.global skein256_nextBlock
+ adiw r24, 2
+ rjmp ubi256_nextBlock
+.global skein256_lastBlock
+ adiw r24, 2
+ rjmp ubi256_lastBlock
+void skein256_ctx2hash(void* dest, skein256_ctx_t* ctx){
+ ubi256_ctx_t uctx;
+ uint16_t outsize_b;
+ uint64_t counter=0;
+ uint8_t outbuffer[UBI256_BLOCKSIZE_B];
+ ubi256_init(&(ctx->ubictx), ctx->ubictx.g, UBI_TYPE_OUT);
+ outsize_b = ctx->outsize_b;
+ while(1){
+ memcpy(&uctx, &(ctx->ubictx), sizeof(ubi256_ctx_t));
+ ubi256_lastBlock(&uctx, &counter, 64);
+ ubi256_ctx2hash(outbuffer, &uctx);
+ if(outsize_b<=UBI256_BLOCKSIZE){
+ memcpy(dest, outbuffer, (outsize_b+7)/8);
+ break;
+ }else{
+ memcpy(dest, outbuffer, UBI256_BLOCKSIZE_B);
+ dest = (uint8_t*)dest + UBI256_BLOCKSIZE_B;
+ outsize_b -= UBI256_BLOCKSIZE;
+ counter++;
+ }
+ }
+ * param dest: r24:r25
+ * param ctx: r22:r23
+ */
+ OUTSIZE_B0 = 16
+ OUTSIZE_B1 = 17
+ UCTX0 = 14
+ UCTX1 = 15
+ UBICTX0 = 12
+ UBICTX1 = 13
+ DEST0 = 10
+ DEST1 = 11
+.global skein256_ctx2hash
+ push_range 10, 17
+ /* 48 || 8 || 32 */
+ stack_alloc_large 88 /* uctx || counter || outbuffer */
+ movw DEST0, r24
+ adiw r30, 1
+ movw UCTX0, r30
+ adiw r30, 48
+ st Z+, r1
+ st Z+, r1
+ st Z+, r1
+ st Z+, r1
+ st Z+, r1
+ st Z+, r1
+ st Z+, r1
+ st Z+, r1
+ movw r26, 22
+ ld OUTSIZE_B0, X+
+ ld OUTSIZE_B1, X+
+ movw UBICTX0, r26
+ /* call ubi256_init */
+ movw r24, UBICTX0
+ adiw r24, 16
+ movw r22, r24
+ movw r24, UBICTX0
+ ldi r20, 63
+ rcall ubi256_init
+ /* main loop */
+ /* copy ubictx in uctx*/
+ movw r30, UCTX0
+ movw r26, UBICTX0
+ ldi r24, 48
+2: ld r25, X+
+ st Z+, r25
+ dec r24
+ brne 2b
+ /* call ubi256_lastBlock */
+ movw r24, UCTX0
+ adiw r24, 48
+ movw r22, r24
+ movw r24, UCTX0
+ clr r21
+ ldi r20, 64
+ rcall ubi256_lastBlock
+ /* copy uctx->g to outbuffer */
+ movw r26, UCTX0
+ adiw r26, 16
+ movw r30, UCTX0
+ adiw r30, 56
+ ldi r24, 32
+2: ld r25, X+
+ st Z+, r25
+ dec r24
+ brne 2b
+ /* compare outsize_b with 256*/
+ cpi OUTSIZE_B1, 2
+ brge 5f
+ cpi OUTSIZE_B1, 1
+ brlo 3f
+ tst OUTSIZE_B0
+ breq 3f
+5: /* copy outbuffer to dest */
+ movw r30, DEST0
+ movw r26, UCTX0
+ adiw r26, 56
+ ldi r24, 32
+6: ld r25, X+
+ st Z+, r25
+ dec r24
+ brne 6b
+ /* store new dest */
+ movw DEST0, r26
+ /* adjust counter and outsize_b*/
+ dec OUTSIZE_B1
+ movw r30, UCTX0
+ adiw r30, 48
+ ldi r24, 1
+ ld r25, Z
+ add r25, r24
+ st Z+, r25
+ ldi r24, 7
+6: ld r25, Z
+ adc r25, r1
+ st Z+, r25
+ dec r24
+ brne 6b
+ rjmp 1b
+3: /* last iteraton */
+ movw r24, OUTSIZE_B0
+ adiw r24, 7
+ lsr r25
+ ror r24
+ lsr r24
+ lsr r24
+ movw r30, DEST0
+ movw r26, UCTX0
+ adiw r26, 56
+ tst r24
+ breq 8f
+7: ld r25, X+
+ st Z+, r25
+ dec r24
+ brne 7b
+ stack_free_large 88
+ pop_range 10, 17
+ ret
+void skein256(void* dest, uint16_t outlength_b, const void* msg, uint32_t length_b){
+ skein256_ctx_t ctx;
+ skein256_init(&ctx, outlength_b);
+ while(length_b>SKEIN256_BLOCKSIZE){
+ skein256_nextBlock(&ctx, msg);
+ msg = (uint8_t*)msg + SKEIN256_BLOCKSIZE_B;
+ length_b -= SKEIN256_BLOCKSIZE;
+ }
+ skein256_lastBlock(&ctx, msg, length_b);
+ skein256_ctx2hash(dest, &ctx);
+ * param dest: r24:r25
+ * param outlength_b: r22:r23
+ * param msg: r20:r21
+ * param length_b: r16:r19
+ */
+LENGTH_B0 = 2
+LENGTH_B1 = 3
+LENGTH_B2 = 4
+LENGTH_B3 = 5
+DEST0 = 6
+DEST1 = 7
+MSG0 = 8
+MSG1 = 9
+CTX0 = 10
+CTX1 = 11
+.global skein256
+ push_range 2, 11
+ stack_alloc 50
+ adiw r30, 1
+ movw CTX0, r30
+ movw DEST0, r24
+ movw MSG0, r20
+ movw LENGTH_B0, r16
+ movw LENGTH_B2, r18
+ /* call skein256_init */
+ movw r24, r30
+ rcall skein256_init
+1: tst LENGTH_B2
+ brne 4f
+ tst LENGTH_B3
+ brne 4f
+ /* call skein256_lastBlock */
+ movw r24, CTX0
+ movw r22, MSG0
+ movw r20, LENGTH_B0
+ rcall skein256_lastBlock
+ /* call skein256_ctx2hash */
+ movw r24, DEST0
+ movw r22, CTX0
+ rcall skein256_ctx2hash
+ /* return */
+ stack_free 50
+ pop_range 2, 11
+ ret
+4: /* process preceeding blocks */
+ movw r24, CTX0
+ movw r22, MSG0
+ rcall skein256_nextBlock
+ movw r24, MSG0
+ adiw r24, 32
+ movw MSG0, r24
+ mov r24, LENGTH_B1
+ mov r25, LENGTH_B2
+ sbiw r24, 1
+ sbc LENGTH_B3, r1
+ mov LENGTH_B1, r24
+ mov LENGTH_B2, r25
+ rjmp 1b
message[i] = 0xFF-i;
cli_putstr_P(PSTR("\r\nmessage: "));
+ cli_hexdump(message, 1);
skein256(hash, outsize_b, message, 8);
cli_hexdump_block(hash, (outsize_b+7)/8, 4, 16);
+ cli_hexdump_block(message, 32, 4, 16);
skein256(hash, outsize_b, message, 32*8);
cli_hexdump_block(hash, (outsize_b+7)/8, 4, 16);
+ cli_hexdump_block(message, 64, 4, 16);
skein256(hash, outsize_b, message, 64*8);
cli_hexdump_block(hash, (outsize_b+7)/8, 4, 16);
cli_hexdump_block(hash, (outsize_b+7)/8, 4, 16);
+ cli_hexdump_block(message, 64, 4, 16);
skein512(hash, outsize_b, message, 64*8);
cli_hexdump_block(hash, (outsize_b+7)/8, 4, 16);
+ cli_hexdump_block(message, 128, 4, 16);
skein512(hash, outsize_b, message, 128*8);
cli_hexdump_block(hash, (outsize_b+7)/8, 4, 16);
cli_hexdump_block(hash, (outsize_b+7)/8, 4, 16);
+ cli_hexdump_block(message, 128, 4, 16);
skein1024(hash, outsize_b, message, 128*8);
cli_hexdump_block(hash, (outsize_b+7)/8, 4, 16);
cli_putstr_P(PSTR("\r\n\r\nCrypto-VS ("));
+ cli_putstr_P(PSTR("; "));
+ cli_putstr(__DATE__);
+ cli_putstr_P(PSTR(" "));
+ cli_putstr(__TIME__);
cli_putstr_P(PSTR(")\r\nloaded and running\r\n"));
threefish256_enc(data, &ctx);
cli_putstr_P(PSTR("\r\ncipher: "));
cli_hexdump(data, 32);
+ /*
+ cli_hexdump_rev(data, 8);
+ cli_putc(' ');
+ cli_hexdump_rev(data+8, 8);
+ cli_putc(' ');
+ cli_hexdump_rev(data+16, 8);
+ cli_putc(' ');
+ cli_hexdump_rev(data+24, 8);
+ cli_putc(' ');
+ */
+ /* second test */
for(i=0; i<32; ++i){
key[i] = 0x10+i;
data[i] = 0xFF-i;
+void init_test(void){
+ threefish256_ctx_t ctx;
+ uint8_t key[32], tweak[16];
+ memset(key, 0,32);
+ memset(tweak, 0,16);
+ threefish256_init(key, tweak, &ctx);
+ cli_putstr_P(PSTR("\r\n ctx: \r\n\tk:"));
+ cli_hexdump(ctx.k, 5*8);
+ cli_putstr_P(PSTR("\r\n\tt:"));
+ cli_hexdump(ctx.t, 3*8);
* main *
const char nessie_str[] PROGMEM = "nessie";
const char test_str[] PROGMEM = "test";
+const char inittest_str[] PROGMEM = "inittest";
const char performance_str[] PROGMEM = "performance";
const char echo_str[] PROGMEM = "echo";
cmdlist_entry_t cmdlist[] PROGMEM = {
// { nessie_str, NULL, testrun_nessie_noekeon},
{ test_str, NULL, testrun_stdtest_threefish},
+ { inittest_str, NULL, init_test},
{ performance_str, NULL, testrun_performance_threefish},
{ echo_str, (void*)1, (void_fpt)echo_ctrl},
} threefish1024_ctx_t;
+void threefish_mix(void* data, uint8_t rot);
+void threefish256_init_c(void* key, void* tweak, threefish256_ctx_t* ctx);
void threefish256_init(void* key, void* tweak, threefish256_ctx_t* ctx);
void threefish512_init(void* key, void* tweak, threefish512_ctx_t* ctx);
#include <string.h>
#include "threefish.h"
-#define X0 (((uint64_t*)data)[0])
-#define X1 (((uint64_t*)data)[1])
-void mix(void* data, uint8_t rot){
- uint64_t x;
- x = X1;
- X0 += x;
- X1 = ((x<<rot)|(x>>(64-rot))) ^ X0;
#define X(a) (((uint64_t*)data)[(a)])
add_key_16(data, ctx, s);
- mix((uint8_t*)data + 0, r0[i%8]);
- mix((uint8_t*)data + 16, r1[i%8]);
- mix((uint8_t*)data + 32, r2[i%8]);
- mix((uint8_t*)data + 48, r3[i%8]);
- mix((uint8_t*)data + 64, r4[i%8]);
- mix((uint8_t*)data + 80, r5[i%8]);
- mix((uint8_t*)data + 96, r6[i%8]);
- mix((uint8_t*)data +112, r7[i%8]);
+ threefish_mix((uint8_t*)data + 0, r0[i%8]);
+ threefish_mix((uint8_t*)data + 16, r1[i%8]);
+ threefish_mix((uint8_t*)data + 32, r2[i%8]);
+ threefish_mix((uint8_t*)data + 48, r3[i%8]);
+ threefish_mix((uint8_t*)data + 64, r4[i%8]);
+ threefish_mix((uint8_t*)data + 80, r5[i%8]);
+ threefish_mix((uint8_t*)data + 96, r6[i%8]);
+ threefish_mix((uint8_t*)data +112, r7[i%8]);
#include <string.h>
#include "threefish.h"
-#define X0 (((uint64_t*)data)[0])
-#define X1 (((uint64_t*)data)[1])
-void mix(void* data, uint8_t rot){
- uint64_t x;
- x = X1;
- X0 += x;
- X1 = ((x<<rot)|(x>>(64-rot))) ^ X0;
#define X(a) (((uint64_t*)data)[(a)])
void permute_4(void* data){
add_key_4(data, ctx, s);
- mix(data, r0[i%8]);
- mix((uint8_t*)data + 16, r1[i%8]);
+ threefish_mix(data, r0[i%8]);
+ threefish_mix((uint8_t*)data + 16, r1[i%8]);
--- /dev/null
+/* threefish_mix.S */
+ This file is part of the AVR-Crypto-Lib.
+ Copyright (C) 2009 Daniel Otte (daniel.otte@rub.de)
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ GNU General Public License for more details.
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ * \author Daniel Otte
+ * \email daniel.otte@rub.de
+ * \date 2009-03-16
+ * \license GPLv3 or later
+ */
+#include "avr-asm-macros.S"
+A0 = 14
+A1 = 15
+A2 = 16
+A3 = 17
+A4 = 18
+A5 = 19
+A6 = 20
+A7 = 21
+#define THREEFISH_KEY_CONST 0x5555.5555.5555.5555.LL / * 2**64/3 * /
+#define K(s) (((uint64_t*)key)[(s)])
+#define T(s) (((uint64_t*)tweak)[(s)])
+void threefish256_init(void* key, void* tweak, threefish256_ctx_t* ctx){
+ memcpy(ctx->k, key, 4*8);
+ memcpy(ctx->t, tweak, 2*8);
+ uint8_t i;
+ ctx->k[4] = THREEFISH_KEY_CONST;
+ for(i=0; i<4; ++i){
+ ctx->k[4] ^= K(i);
+ }
+ ctx->t[2] = T(0) ^ T(1);
+ * param key: r24:r25
+ * param tweak: r22:r23
+ * param ctx: r20:r21
+ */
+.global threefish256_init
+ push_range 14, 17
+ movw r30, r20
+ movw r26, r24
+ ldi r24, 4
+ ldi A7, 0x55
+ mov A6, A7
+ movw A4, A6
+ movw A2, A6
+ movw A0, A6
+ ld r0, X+
+ st Z+, r0
+ eor A0, r0
+ ld r0, X+
+ st Z+, r0
+ eor A1, r0
+ ld r0, X+
+ st Z+, r0
+ eor A2, r0
+ ld r0, X+
+ st Z+, r0
+ eor A3, r0
+ ld r0, X+
+ st Z+, r0
+ eor A4, r0
+ ld r0, X+
+ st Z+, r0
+ eor A5, r0
+ ld r0, X+
+ st Z+, r0
+ eor A6, r0
+ ld r0, X+
+ st Z+, r0
+ eor A7, r0
+ dec r24
+ brne 1b
+ st Z+, A0
+ st Z+, A1
+ st Z+, A2
+ st Z+, A3
+ st Z+, A4
+ st Z+, A5
+ st Z+, A6
+ st Z+, A7
+ /* now the tweak */
+ movw r26, r22
+ ld A0, X+
+ ld A1, X+
+ ld A2, X+
+ ld A3, X+
+ ld A4, X+
+ ld A5, X+
+ ld A6, X+
+ ld A7, X+
+ st Z+, A0
+ st Z+, A1
+ st Z+, A2
+ st Z+, A3
+ st Z+, A4
+ st Z+, A5
+ st Z+, A6
+ st Z+, A7
+ ld r0, X+
+ eor A0, r0
+ st Z+, r0
+ ld r0, X+
+ eor A1, r0
+ st Z+, r0
+ ld r0, X+
+ eor A2, r0
+ st Z+, r0
+ ld r0, X+
+ eor A3, r0
+ st Z+, r0
+ ld r0, X+
+ eor A4, r0
+ st Z+, r0
+ ld r0, X+
+ eor A5, r0
+ st Z+, r0
+ ld r0, X+
+ eor A6, r0
+ st Z+, r0
+ ld r0, X+
+ eor A7, r0
+ st Z+, r0
+ st Z+, A0
+ st Z+, A1
+ st Z+, A2
+ st Z+, A3
+ st Z+, A4
+ st Z+, A5
+ st Z+, A6
+ st Z+, A7
+ pop_range 14, 17
+ ret
+#define X(a) (((uint64_t*)data)[(a)])
+void permute_4(void* data){
+ uint64_t t;
+ t = X(1);
+ X(1) = X(3);
+ X(3) = t;
+void add_key_4(void* data, threefish256_ctx_t* ctx, uint8_t s){ /* s: 0..19 * /
+ X(0) += ctx->k[(s+0)%5];
+ X(1) += ctx->k[(s+1)%5] + ctx->t[s%3];
+ X(2) += ctx->k[(s+2)%5] + ctx->t[(s+1)%3];
+ X(3) += ctx->k[(s+3)%5] + s;
+void threefish256_enc(void* data, threefish256_ctx_t* ctx){
+ uint8_t i=0,s=0;
+ uint8_t r0[8] = { 5, 36, 13, 58, 26, 53, 11, 59};
+ uint8_t r1[8] = {56, 28, 46, 44, 20, 35, 42, 50};
+ do{
+ if(i%4==0){
+ add_key_4(data, ctx, s);
+ ++s;
+ }
+ threefish_mix(data, r0[i%8]);
+ threefish_mix((uint8_t*)data + 16, r1[i%8]);
+ permute_4(data);
+ ++i;
+ }while(i!=72);
+ add_key_4(data, ctx, s);
+I = 2
+S = 3
+DATA0 = 4
+DATA1 = 5
+CTX0 = 6
+CTX1 = 7
+IDX0 = 8
+IDX1 = 9
+IDX2 = 10
+IDX3 = 11
+ * param data: r24:r25
+ * param ctx: r22:r23
+ */
+.global threefish256_enc
+ push r28
+ push r29
+ push_range 2, 17
+ movw DATA0, r24
+ movw CTX0, r22
+ clr I
+ clr S
+ mov r30, I
+ andi r30, 0x03
+ breq 2f
+ rjmp 4f
+ ldi r30, lo8(threefish256_slut5)
+ ldi r31, hi8(threefish256_slut5)
+ mov r26, S
+ add r30, r26
+ adc r31, r1
+ lpm IDX0, Z+
+ lpm IDX1, Z+
+ lpm IDX2, Z+
+ lpm IDX3, Z
+ movw r30, CTX0
+ movw r26, DATA0
+ add r30, IDX0
+ adc r31, r1
+ rcall add_z_to_x8
+ movw r30, CTX0
+ add r30, IDX1
+ adc r31, r1
+ rcall add_z_to_x8
+ movw r30, CTX0
+ add r30, IDX2
+ adc r31, r1
+ rcall add_z_to_x8
+ movw r30, CTX0
+ add r30, IDX3
+ adc r31, r1
+ rcall add_z_to_x8
+ /* now the remaining key */
+ sbiw r26, 3*8
+ ldi r30, lo8(threefish256_slut3)
+ ldi r31, hi8(threefish256_slut3)
+ add r30, S
+ adc r31, r1
+ lpm IDX0, Z+
+ lpm IDX1, Z
+ movw r30, CTX0
+ adiw r30, 5*8
+ movw IDX2, r30
+ add r30, IDX0
+ adc r31, r1
+ rcall add_z_to_x8
+ movw r30, IDX2
+ add r30, IDX1
+ adc r31, r1
+ rcall add_z_to_x8
+ ld r0, X
+ add r0, S
+ st X+, r0
+ ld r0, X
+ adc r0, r1
+ st X+, r0
+ ld r0, X
+ adc r0, r1
+ st X+, r0
+ ld r0, X
+ adc r0, r1
+ st X+, r0
+ ld r0, X
+ adc r0, r1
+ st X+, r0
+ ld r0, X
+ adc r0, r1
+ st X+, r0
+ ld r0, X
+ adc r0, r1
+ st X+, r0
+ ld r0, X
+ adc r0, r1
+ st X+, r0
+ inc S
+ mov r26, S
+ cpi r26, 19
+ brmi 4f
+ pop_range 2, 17
+ pop r29
+ pop r28
+ ret
+ /* call mix */
+ ldi r30, lo8(threefish256_rc0)
+ ldi r31, hi8(threefish256_rc0)
+ mov r26, I
+ andi r26, 0x07
+ add r30, r26
+ adc r31, r1
+ lpm r22, Z
+ adiw r30, 8
+ lpm IDX0, Z
+ movw r24, DATA0
+ call threefish_mix_asm /* no rcall? */
+ movw r24, DATA0
+ adiw r24, 16
+ mov r22, IDX0
+ call threefish_mix_asm /* no rcall? */
+ /* now the permutation */
+ movw r26, DATA0
+ adiw r26, 8
+ movw r30, r26
+ adiw r30, 16
+ ld IDX0, X
+ ld IDX1, Z
+ st X+, IDX1
+ st Z+, IDX0
+ ld IDX0, X
+ ld IDX1, Z
+ st X+, IDX1
+ st Z+, IDX0
+ ld IDX0, X
+ ld IDX1, Z
+ st X+, IDX1
+ st Z+, IDX0
+ ld IDX0, X
+ ld IDX1, Z
+ st X+, IDX1
+ st Z+, IDX0
+ ld IDX0, X
+ ld IDX1, Z
+ st X+, IDX1
+ st Z+, IDX0
+ ld IDX0, X
+ ld IDX1, Z
+ st X+, IDX1
+ st Z+, IDX0
+ ld IDX0, X
+ ld IDX1, Z
+ st X+, IDX1
+ st Z+, IDX0
+ ld IDX0, X
+ ld IDX1, Z
+ st X+, IDX1
+ st Z+, IDX0
+ inc I
+ rjmp 1b
+ .byte 0x00, 0x08, 0x10, 0x18, 0x20, 0x00, 0x08, 0x10
+ .byte 0x18, 0x20, 0x00, 0x08, 0x10, 0x18, 0x20, 0x00
+ .byte 0x08, 0x10, 0x18, 0x20, 0x00, 0x08, 0x10
+ .byte 0x00, 0x08, 0x10, 0x00, 0x08, 0x10, 0x00, 0x08
+ .byte 0x10, 0x00, 0x08, 0x10, 0x00, 0x08, 0x10, 0x00
+ .byte 0x08, 0x10, 0x00, 0x08, 0x10, 0x00, 0x08
+;threefish256_rc0: .byte 5, 36, 13, 58, 26, 53, 11, 59
+;threefish256_rc1: .byte 56, 28, 46, 44, 20, 35, 42, 50
+threefish256_rc0: .byte 0x1b, 0x44, 0x2b, 0x72, 0x32, 0x7b, 0x13, 0x73
+threefish256_rc1: .byte 0x70, 0x34, 0x6a, 0x54, 0x24, 0x43, 0x52, 0x62
+ ld r0, Z+
+ ld r1, X
+ add r1, r0
+ st X+, r1
+ ld r0, Z+
+ ld r1, X
+ adc r1, r0
+ st X+, r1
+ ld r0, Z+
+ ld r1, X
+ adc r1, r0
+ st X+, r1
+ ld r0, Z+
+ ld r1, X
+ adc r1, r0
+ st X+, r1
+ ld r0, Z+
+ ld r1, X
+ adc r1, r0
+ st X+, r1
+ ld r0, Z+
+ ld r1, X
+ adc r1, r0
+ st X+, r1
+ ld r0, Z+
+ ld r1, X
+ adc r1, r0
+ st X+, r1
+ ld r0, Z+
+ ld r1, X
+ adc r1, r0
+ st X+, r1
+ clr r1
+ ret
--- /dev/null
+/* threefish_mix.S */
+ This file is part of the AVR-Crypto-Lib.
+ Copyright (C) 2009 Daniel Otte (daniel.otte@rub.de)
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ GNU General Public License for more details.
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ * \author Daniel Otte
+ * \email daniel.otte@rub.de
+ * \date 2009-03-16
+ * \license GPLv3 or later
+ */
+#include "avr-asm-macros.S"
+A0 = 14
+A1 = 15
+A2 = 16
+A3 = 17
+A4 = 18
+A5 = 19
+A6 = 20
+A7 = 21
+#define THREEFISH_KEY_CONST 0x5555.5555.5555.5555.LL / * 2**64/3 * /
+#define K(s) (((uint64_t*)key)[(s)])
+#define T(s) (((uint64_t*)tweak)[(s)])
+void threefish256_init(void* key, void* tweak, threefish256_ctx_t* ctx){
+ memcpy(ctx->k, key, 4*8);
+ memcpy(ctx->t, tweak, 2*8);
+ uint8_t i;
+ ctx->k[4] = THREEFISH_KEY_CONST;
+ for(i=0; i<4; ++i){
+ ctx->k[4] ^= K(i);
+ }
+ ctx->t[2] = T(0) ^ T(1);
+ * param key: r24:r25
+ * param tweak: r22:r23
+ * param ctx: r20:r21
+ */
+.global threefish256_init
+ push_range 14, 17
+ movw r30, r20
+ movw r26, r24
+ ldi r24, 4
+ ldi A7, 0x55
+ mov A6, A7
+ movw A4, A6
+ movw A2, A6
+ movw A0, A6
+ ld r0, X+
+ st Z+, r0
+ eor A0, r0
+ ld r0, X+
+ st Z+, r0
+ eor A1, r0
+ ld r0, X+
+ st Z+, r0
+ eor A2, r0
+ ld r0, X+
+ st Z+, r0
+ eor A3, r0
+ ld r0, X+
+ st Z+, r0
+ eor A4, r0
+ ld r0, X+
+ st Z+, r0
+ eor A5, r0
+ ld r0, X+
+ st Z+, r0
+ eor A6, r0
+ ld r0, X+
+ st Z+, r0
+ eor A7, r0
+ dec r24
+ brne 1b
+ st Z+, A0
+ st Z+, A1
+ st Z+, A2
+ st Z+, A3
+ st Z+, A4
+ st Z+, A5
+ st Z+, A6
+ st Z+, A7
+ /* now the tweak */
+ movw r26, r22
+ ld A0, X+
+ ld A1, X+
+ ld A2, X+
+ ld A3, X+
+ ld A4, X+
+ ld A5, X+
+ ld A6, X+
+ ld A7, X+
+ st Z+, A0
+ st Z+, A1
+ st Z+, A2
+ st Z+, A3
+ st Z+, A4
+ st Z+, A5
+ st Z+, A6
+ st Z+, A7
+ ld r0, X+
+ eor A0, r0
+ st Z+, r0
+ ld r0, X+
+ eor A1, r0
+ st Z+, r0
+ ld r0, X+
+ eor A2, r0
+ st Z+, r0
+ ld r0, X+
+ eor A3, r0
+ st Z+, r0
+ ld r0, X+
+ eor A4, r0
+ st Z+, r0
+ ld r0, X+
+ eor A5, r0
+ st Z+, r0
+ ld r0, X+
+ eor A6, r0
+ st Z+, r0
+ ld r0, X+
+ eor A7, r0
+ st Z+, r0
+ st Z+, A0
+ st Z+, A1
+ st Z+, A2
+ st Z+, A3
+ st Z+, A4
+ st Z+, A5
+ st Z+, A6
+ st Z+, A7
+ pop_range 14, 17
+ ret
+#define X(a) (((uint64_t*)data)[(a)])
+void permute_4(void* data){
+ uint64_t t;
+ t = X(1);
+ X(1) = X(3);
+ X(3) = t;
+void add_key_4(void* data, threefish256_ctx_t* ctx, uint8_t s){ /* s: 0..19 * /
+ X(0) += ctx->k[(s+0)%5];
+ X(1) += ctx->k[(s+1)%5] + ctx->t[s%3];
+ X(2) += ctx->k[(s+2)%5] + ctx->t[(s+1)%3];
+ X(3) += ctx->k[(s+3)%5] + s;
+void threefish256_enc(void* data, threefish256_ctx_t* ctx){
+ uint8_t i=0,s=0;
+ uint8_t r0[8] = { 5, 36, 13, 58, 26, 53, 11, 59};
+ uint8_t r1[8] = {56, 28, 46, 44, 20, 35, 42, 50};
+ do{
+ if(i%4==0){
+ add_key_4(data, ctx, s);
+ ++s;
+ }
+ threefish_mix(data, r0[i%8]);
+ threefish_mix((uint8_t*)data + 16, r1[i%8]);
+ permute_4(data);
+ ++i;
+ }while(i!=72);
+ add_key_4(data, ctx, s);
+I = 2
+S = 3
+DATA0 = 4
+DATA1 = 5
+CTX0 = 6
+CTX1 = 7
+IDX0 = 8
+IDX1 = 9
+IDX2 = 10
+IDX3 = 11
+ * param data: r24:r25
+ * param ctx: r22:r23
+ */
+.global threefish256_enc
+ push r28
+ push r29
+ push_range 2, 17
+ movw DATA0, r24
+ movw CTX0, r22
+ clr I
+ clr S
+ mov r30, I
+ andi r30, 0x03
+ breq 2f
+ rjmp 4f
+ ldi r30, lo8(threefish256_slut5)
+ ldi r31, hi8(threefish256_slut5)
+ mov r26, S
+ add r30, r26
+ adc r31, r1
+ lpm IDX0, Z+
+ lpm IDX1, Z+
+ lpm IDX2, Z+
+ lpm IDX3, Z
+ movw r30, CTX0
+ movw r26, DATA0
+ add r30, IDX0
+ adc r31, r1
+ rcall add_z_to_x8
+ movw r30, CTX0
+ add r30, IDX1
+ adc r31, r1
+ rcall add_z_to_x8
+ movw r30, CTX0
+ add r30, IDX2
+ adc r31, r1
+ rcall add_z_to_x8
+ movw r30, CTX0
+ add r30, IDX3
+ adc r31, r1
+ rcall add_z_to_x8
+ /* now the remaining key */
+ sbiw r26, 3*8
+ ldi r30, lo8(threefish256_slut3)
+ ldi r31, hi8(threefish256_slut3)
+ add r30, S
+ adc r31, r1
+ lpm IDX0, Z+
+ lpm IDX1, Z
+ movw r30, CTX0
+ adiw r30, 5*8
+ movw IDX2, r30
+ add r30, IDX0
+ adc r31, r1
+ rcall add_z_to_x8
+ movw r30, IDX2
+ add r30, IDX1
+ adc r31, r1
+ rcall add_z_to_x8
+ ld r0, X
+ add r0, S
+ st X+, r0
+ ldi r16, 7
+ ld r0, X
+ adc r0, r1
+ st X+, r0
+ dec r16
+ brne 3b
+ inc S
+ mov r26, S
+ cpi r26, 19
+ brmi 4f
+ pop_range 2, 17
+ pop r29
+ pop r28
+ ret
+ /* call mix */
+ ldi r30, lo8(threefish256_rc0)
+ ldi r31, hi8(threefish256_rc0)
+ mov r26, I
+ andi r26, 0x07
+ add r30, r26
+ adc r31, r1
+ lpm r22, Z
+ adiw r30, 8
+ lpm IDX0, Z
+ movw r24, DATA0
+ call threefish_mix_asm /* no rcall? */
+ movw r24, DATA0
+ adiw r24, 16
+ mov r22, IDX0
+ call threefish_mix_asm /* no rcall? */
+ /* now the permutation */
+ movw r26, DATA0
+ adiw r26, 8
+ movw r30, r26
+ adiw r30, 16
+ ldi r16, 8
+3: ld IDX0, X
+ ld IDX1, Z
+ st X+, IDX1
+ st Z+, IDX0
+ dec r16
+ brne 3b
+ inc I
+ rjmp 1b
+ .byte 0x00, 0x08, 0x10, 0x18, 0x20, 0x00, 0x08, 0x10
+ .byte 0x18, 0x20, 0x00, 0x08, 0x10, 0x18, 0x20, 0x00
+ .byte 0x08, 0x10, 0x18, 0x20, 0x00, 0x08, 0x10
+ .byte 0x00, 0x08, 0x10, 0x00, 0x08, 0x10, 0x00, 0x08
+ .byte 0x10, 0x00, 0x08, 0x10, 0x00, 0x08, 0x10, 0x00
+ .byte 0x08, 0x10, 0x00, 0x08, 0x10, 0x00, 0x08
+threefish256_rc0: .byte 0x1b, 0x44, 0x2b, 0x72, 0x32, 0x7b, 0x13, 0x73
+threefish256_rc1: .byte 0x70, 0x34, 0x6a, 0x54, 0x24, 0x43, 0x52, 0x62
+ ld r0, Z+
+ ld r1, X
+ add r1, r0
+ st X+, r1
+ ldi r16, 7
+ ld r0, Z+
+ ld r1, X
+ adc r1, r0
+ st X+, r1
+ dec r16
+ brne 1b
+ clr r1
+ ret
#include <string.h>
#include "threefish.h"
-#define X0 (((uint64_t*)data)[0])
-#define X1 (((uint64_t*)data)[1])
-void mix(void* data, uint8_t rot){
- uint64_t x;
- x = X1;
- X0 += x;
- X1 = ((x<<rot)|(x>>(64-rot))) ^ X0;
#define X(a) (((uint64_t*)data)[(a)])
void permute_8(void* data){
uint64_t t;
add_key_8(data, ctx, s);
- mix((uint8_t*)data + 0, r0[i%8]);
- mix((uint8_t*)data + 16, r1[i%8]);
- mix((uint8_t*)data + 32, r2[i%8]);
- mix((uint8_t*)data + 48, r3[i%8]);
+ threefish_mix((uint8_t*)data + 0, r0[i%8]);
+ threefish_mix((uint8_t*)data + 16, r1[i%8]);
+ threefish_mix((uint8_t*)data + 32, r2[i%8]);
+ threefish_mix((uint8_t*)data + 48, r3[i%8]);
--- /dev/null
+/* threefish_mix.S */
+ This file is part of the AVR-Crypto-Lib.
+ Copyright (C) 2009 Daniel Otte (daniel.otte@rub.de)
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ GNU General Public License for more details.
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ * \author Daniel Otte
+ * \email daniel.otte@rub.de
+ * \date 2009-03-16
+ * \license GPLv3 or later
+ */
+#include "avr-asm-macros.S"
+#define B0 (((uint64_t*)data)[0])
+#define B1 (((uint64_t*)data)[1])
+void mix(void* data, uint8_t rot){
+ uint64_t x;
+ x = B1;
+ B0 += x;
+ B1 = ((x<<rot)|(x>>(64-rot))) ^ B0;
+A0 = 10
+A1 = 11
+A2 = 12
+A3 = 13
+A4 = 14
+A5 = 15
+A6 = 16
+A7 = 17
+B0 = 18
+B1 = 19
+B2 = 20
+B3 = 21
+B4 = 22
+B5 = 23
+B6 = 24
+B7 = 25
+vROT = 27
+ * param data: r24:r25
+ * param rot: r22
+ */
+.global threefish_mix_asm
+ movw r28, r24
+ mov vROT,r22
+ ldd A0, Y+ 0
+ ldd A1, Y+ 1
+ ldd A2, Y+ 2
+ ldd A3, Y+ 3
+ ldd A4, Y+ 4
+ ldd A5, Y+ 5
+ ldd A6, Y+ 6
+ ldd A7, Y+ 7
+ ldd B0, Y+ 8
+ ldd B1, Y+ 9
+ ldd B2, Y+10
+ ldd B3, Y+11
+ ldd B4, Y+12
+ ldd B5, Y+13
+ ldd B6, Y+14
+ ldd B7, Y+15
+ add A0, B0
+ adc A1, B1
+ adc A2, B2
+ adc A3, B3
+ adc A4, B4
+ adc A5, B5
+ adc A6, B6
+ adc A7, B7
+ mov r26, vROT
+ swap r26
+ andi r26, 0x07
+ ldi r30, pm_lo8(byte_rot_jmptable)
+ ldi r31, pm_hi8(byte_rot_jmptable)
+ add r30, r26
+ adc r31, r1
+ ijmp
+ bst vROT, 3
+ andi vROT, 0x07
+ brts 1f
+ rjmp bit_rotl
+1: rjmp bit_rotr
+ eor B0, A0
+ eor B1, A1
+ eor B2, A2
+ eor B3, A3
+ eor B4, A4
+ eor B5, A5
+ eor B6, A6
+ eor B7, A7
+ std Y+ 0, A0
+ std Y+ 1, A1
+ std Y+ 2, A2
+ std Y+ 3, A3
+ std Y+ 4, A4
+ std Y+ 5, A5
+ std Y+ 6, A6
+ std Y+ 7, A7
+ std Y+ 8, B0
+ std Y+ 9, B1
+ std Y+10, B2
+ std Y+11, B3
+ std Y+12, B4
+ std Y+13, B5
+ std Y+14, B6
+ std Y+15, B7
+ ret
+ rjmp post_byterot;ret; rjmp byte_rotr_0
+ rjmp byte_rotr_7
+ rjmp byte_rotr_6
+ rjmp byte_rotr_5
+ rjmp byte_rotr_4
+ rjmp byte_rotr_3
+ rjmp byte_rotr_2
+ rjmp byte_rotr_1
+ rjmp post_byterot;ret; rjmp byte_rotr_0
+; 0 1 2 3 4 5 6 7
+; 1 2 3 4 5 6 7 0
+byte_rotr_1: /* 10 words */
+ mov r0, B0
+ mov B0, B1
+ mov B1, B2
+ mov B2, B3
+ mov B3, B4
+ mov B4, B5
+ mov B5, B6
+ mov B6, B7
+ mov B7, r0
+ rjmp post_byterot
+; 0 1 2 3 4 5 6 7
+; 2 3 4 5 6 7 0 1
+byte_rotr_2: /* 11 words */
+ mov r0, B0
+ mov B0, B2
+ mov B2, B4
+ mov B4, B6
+ mov B6, r0
+ mov r0, B1
+ mov B1, B3
+ mov B3, B5
+ mov B5, B7
+ mov B7, r0
+ rjmp post_byterot
+; 0 1 2 3 4 5 6 7
+; 3 4 5 6 7 0 1 2
+byte_rotr_3: /* 10 words */
+ mov r0, B0
+ mov B0, B3
+ mov B3, B6
+ mov B6, B1
+ mov B1, B4
+ mov B4, B7
+ mov B7, B2
+ mov B2, B5
+ mov B5, r0
+ rjmp post_byterot
+; 0 1 2 3 4 5 6 7
+; 4 5 6 7 0 1 2 3
+byte_rotr_4: /* 13 words */
+ mov r0, B0
+ mov B0, B4
+ mov B4, r0
+ mov r0, B1
+ mov B1, B5
+ mov B5, r0
+ mov r0, B2
+ mov B2, B6
+ mov B6, r0
+ mov r0, B3
+ mov B3, B7
+ mov B7, r0
+ rjmp post_byterot
+; 0 1 2 3 4 5 6 7
+; 5 6 7 0 1 2 3 4
+byte_rotr_5: /* 10 words */
+ mov r0, B0
+ mov B0, B5
+ mov B5, B2
+ mov B2, B7
+ mov B7, B4
+ mov B4, B1
+ mov B1, B6
+ mov B6, B3
+ mov B3, r0
+ rjmp post_byterot
+; 0 1 2 3 4 5 6 7
+; 6 7 0 1 2 3 4 5
+byte_rotr_6: /* 11 words */
+ mov r0, B0
+ mov B0, B6
+ mov B6, B4
+ mov B4, B2
+ mov B2, r0
+ mov r0, B1
+ mov B1, B7
+ mov B7, B5
+ mov B5, B3
+ mov B3, r0
+ rjmp post_byterot
+; 0 1 2 3 4 5 6 7
+; 7 0 1 2 3 4 5 6
+byte_rotr_7: /* 10 words */
+ mov r0, B7
+ mov B7, B6
+ mov B6, B5
+ mov B5, B4
+ mov B4, B3
+ mov B3, B2
+ mov B2, B1
+ mov B1, B0
+ mov B0, r0
+ rjmp post_byterot
+ tst vROT
+ brne 1f
+ rjmp post_bitrot
+1: mov r0, B7
+ rol r0
+ rol B0
+ rol B1
+ rol B2
+ rol B3
+ rol B4
+ rol B5
+ rol B6
+ rol B7
+ dec vROT
+ rjmp bit_rotl
+ tst vROT
+ brne 1f
+ rjmp post_bitrot
+1: mov r0, B0
+ ror r0
+ ror B7
+ ror B6
+ ror B5
+ ror B4
+ ror B3
+ ror B2
+ ror B1
+ ror B0
+ dec vROT
+ rjmp bit_rotr
--- /dev/null
+/* threefish_mix.S */
+ This file is part of the AVR-Crypto-Lib.
+ Copyright (C) 2009 Daniel Otte (daniel.otte@rub.de)
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ GNU General Public License for more details.
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ * \author Daniel Otte
+ * \email daniel.otte@rub.de
+ * \date 2009-03-16
+ * \license GPLv3 or later
+ */
+#include "avr-asm-macros.S"
+#define B0 (((uint64_t*)data)[0])
+#define B1 (((uint64_t*)data)[1])
+void mix(void* data, uint8_t rot){
+ uint64_t x;
+ x = B1;
+ B0 += x;
+ B1 = ((x<<rot)|(x>>(64-rot))) ^ B0;
+A0 = 10
+A1 = 11
+A2 = 12
+A3 = 13
+A4 = 14
+A5 = 15
+A6 = 16
+A7 = 17
+B0 = 18
+B1 = 19
+B2 = 20
+B3 = 21
+B4 = 22
+B5 = 23
+B6 = 24
+B7 = 25
+vROT = 27
+ * param data: r24:r25
+ * param rot: r22
+ */
+.global threefish_mix
+ push r28
+ push r29
+ push_range 10, 17
+ movw r28, r24
+ mov vROT,r22
+ ldd A0, Y+ 0
+ ldd A1, Y+ 1
+ ldd A2, Y+ 2
+ ldd A3, Y+ 3
+ ldd A4, Y+ 4
+ ldd A5, Y+ 5
+ ldd A6, Y+ 6
+ ldd A7, Y+ 7
+ ldd B0, Y+ 8
+ ldd B1, Y+ 9
+ ldd B2, Y+10
+ ldd B3, Y+11
+ ldd B4, Y+12
+ ldd B5, Y+13
+ ldd B6, Y+14
+ ldd B7, Y+15
+ add A0, B0
+ adc A1, B1
+ adc A2, B2
+ adc A3, B3
+ adc A4, B4
+ adc A5, B5
+ adc A6, B6
+ adc A7, B7
+ mov r26, vROT
+ adiw r26, 3
+ lsr r26
+ lsr r26
+ lsr r26
+; andi r26, 0x07
+ ldi r30, pm_lo8(byte_rot_jmptable)
+ ldi r31, pm_hi8(byte_rot_jmptable)
+ add r30, r26
+ adc r31, r1
+ ijmp
+ ldi r30, lo8(bit_rot_lut)
+ ldi r31, hi8(bit_rot_lut)
+ andi vROT, 0x07
+ add r30, vROT
+ adc r31, r1
+ lpm r27, Z
+ bst r27, 7
+ andi r27, 0x07
+ brts 1f
+ rjmp bit_rotl
+1: rjmp bit_rotr
+ eor B0, A0
+ eor B1, A1
+ eor B2, A2
+ eor B3, A3
+ eor B4, A4
+ eor B5, A5
+ eor B6, A6
+ eor B7, A7
+ std Y+ 0, A0
+ std Y+ 1, A1
+ std Y+ 2, A2
+ std Y+ 3, A3
+ std Y+ 4, A4
+ std Y+ 5, A5
+ std Y+ 6, A6
+ std Y+ 7, A7
+ std Y+ 8, B0
+ std Y+ 9, B1
+ std Y+10, B2
+ std Y+11, B3
+ std Y+12, B4
+ std Y+13, B5
+ std Y+14, B6
+ std Y+15, B7
+ pop_range 10, 17
+ pop r29
+ pop r28
+ ret
+ .byte 0x00
+ .byte 0x01
+ .byte 0x02
+ .byte 0x03
+ .byte 0x04
+ .byte 0x83
+ .byte 0x82
+ .byte 0x81
+ rjmp post_byterot;ret; rjmp byte_rotr_0
+ rjmp byte_rotr_7
+ rjmp byte_rotr_6
+ rjmp byte_rotr_5
+ rjmp byte_rotr_4
+ rjmp byte_rotr_3
+ rjmp byte_rotr_2
+ rjmp byte_rotr_1
+ rjmp post_byterot;ret; rjmp byte_rotr_0
+; 0 1 2 3 4 5 6 7
+; 1 2 3 4 5 6 7 0
+byte_rotr_1: /* 10 words */
+ mov r0, B0
+ mov B0, B1
+ mov B1, B2
+ mov B2, B3
+ mov B3, B4
+ mov B4, B5
+ mov B5, B6
+ mov B6, B7
+ mov B7, r0
+ rjmp post_byterot
+; 0 1 2 3 4 5 6 7
+; 2 3 4 5 6 7 0 1
+byte_rotr_2: /* 11 words */
+ mov r0, B0
+ mov B0, B2
+ mov B2, B4
+ mov B4, B6
+ mov B6, r0
+ mov r0, B1
+ mov B1, B3
+ mov B3, B5
+ mov B5, B7
+ mov B7, r0
+ rjmp post_byterot
+; 0 1 2 3 4 5 6 7
+; 3 4 5 6 7 0 1 2
+byte_rotr_3: /* 10 words */
+ mov r0, B0
+ mov B0, B3
+ mov B3, B6
+ mov B6, B1
+ mov B1, B4
+ mov B4, B7
+ mov B7, B2
+ mov B2, B5
+ mov B5, r0
+ rjmp post_byterot
+; 0 1 2 3 4 5 6 7
+; 4 5 6 7 0 1 2 3
+byte_rotr_4: /* 13 words */
+ mov r0, B0
+ mov B0, B4
+ mov B4, r0
+ mov r0, B1
+ mov B1, B5
+ mov B5, r0
+ mov r0, B2
+ mov B2, B6
+ mov B6, r0
+ mov r0, B3
+ mov B3, B7
+ mov B7, r0
+ rjmp post_byterot
+; 0 1 2 3 4 5 6 7
+; 5 6 7 0 1 2 3 4
+byte_rotr_5: /* 10 words */
+ mov r0, B0
+ mov B0, B5
+ mov B5, B2
+ mov B2, B7
+ mov B7, B4
+ mov B4, B1
+ mov B1, B6
+ mov B6, B3
+ mov B3, r0
+ rjmp post_byterot
+; 0 1 2 3 4 5 6 7
+; 6 7 0 1 2 3 4 5
+byte_rotr_6: /* 11 words */
+ mov r0, B0
+ mov B0, B6
+ mov B6, B4
+ mov B4, B2
+ mov B2, r0
+ mov r0, B1
+ mov B1, B7
+ mov B7, B5
+ mov B5, B3
+ mov B3, r0
+ rjmp post_byterot
+; 0 1 2 3 4 5 6 7
+; 7 0 1 2 3 4 5 6
+byte_rotr_7: /* 10 words */
+ mov r0, B7
+ mov B7, B6
+ mov B6, B5
+ mov B5, B4
+ mov B4, B3
+ mov B3, B2
+ mov B2, B1
+ mov B1, B0
+ mov B0, r0
+ rjmp post_byterot
+ tst r27
+ brne 1f
+ rjmp post_bitrot
+1: mov r0, B7
+ rol r0
+ rol B0
+ rol B1
+ rol B2
+ rol B3
+ rol B4
+ rol B5
+ rol B6
+ rol B7
+ dec r27
+ rjmp bit_rotl
+ tst r27
+ brne 1f
+ rjmp post_bitrot
+1: mov r0, B0
+ ror r0
+ ror B7
+ ror B6
+ ror B5
+ ror B4
+ ror B3
+ ror B2
+ ror B1
+ ror B0
+ dec r27
+ rjmp bit_rotr
--- /dev/null
+/* threefish_mix_c.c */
+ This file is part of the AVR-Crypto-Lib.
+ Copyright (C) 2009 Daniel Otte (daniel.otte@rub.de)
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ GNU General Public License for more details.
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ * \author Daniel Otte
+ * \email daniel.otte@rub.de
+ * \date 2009-03-16
+ * \license GPLv3 or later
+ *
+ *
+ *
+ */
+#include <stdint.h>
+#define X0 (((uint64_t*)data)[0])
+#define X1 (((uint64_t*)data)[1])
+void threefish_mix(void* data, uint8_t rot){
+ uint64_t x;
+ x = X1;
+ X0 += x;
+ X1 = ((x<<rot)|(x>>(64-rot))) ^ X0;
--- /dev/null
+/* ubi256_asm.S */
+ This file is part of the AVR-Crypto-Lib.
+ Copyright (C) 2009 Daniel Otte (daniel.otte@rub.de)
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ GNU General Public License for more details.
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ * \author Daniel Otte
+ * \email daniel.otte@rub.de
+ * \date 2009-03-16
+ * \license GPLv3 or later
+ */
+#include "avr-asm-macros.S"
+void ubi256_init(ubi256_ctx_t* ctx, const void* g, uint8_t type){
+ memset(ctx->tweak, 0, 15);
+ ctx->tweak[15] = 0x40+type;
+ memcpy(ctx->g, g, 32);
+ * param ctx: r24:r25
+ * param g: r22:r23
+ * param type: r20
+ */
+.global ubi256_init
+ movw r26, r24
+ ldi r21, 15
+1: st X+, r1
+ dec r21
+ brne 1b
+ ori r20, 0x40
+ st X+, r20
+ ldi r21, 32
+ movw r30, r22
+2: ld r20, Z+
+ st X+, r20
+ dec r21
+ brne 2b
+ ret
+void ubi256_ctx2hash(void* dest, const ubi256_ctx_t* ctx){
+ memcpy(dest, ctx->g, UBI256_BLOCKSIZE_B);
+ * param dest: r24:r24
+ * param ctx: r22:r23
+ */
+.global ubi256_ctx2hash
+ movw r26, r24
+ movw r30, r22
+ adiw r30, 16
+ ldi r22, 32
+1: ld r23, Z+
+ st X+, r23
+ dec r22
+ brne 1b
+ ret
+void ubi256_nextBlock(ubi256_ctx_t* ctx, const void* block){
+ threefish256_ctx_t tfctx;
+ ((uint64_t*)(ctx->tweak))[0] += UBI256_BLOCKSIZE_B;
+ threefish256_init(ctx->g, ctx->tweak, &tfctx);
+ memcpy(ctx->g, block, UBI256_BLOCKSIZE_B);
+ threefish256_enc(ctx->g, &tfctx);
+ memxor(ctx->g, block, UBI256_BLOCKSIZE_B);
+ ctx->tweak[15] &= (uint8_t)~0x40;
+ * param ctx: r24:r25
+ * param block: r22:r23
+ */
+CTX0 = 2
+CTX1 = 3
+BLOCK0 = 4
+BLOCK1 = 5
+TFCTX0 = 6
+TFCTX1 = 7
+.global ubi256_nextBlock
+ stack_alloc_large 64
+ push_range 2, 7
+ adiw r30, 1 /* Z points to tfctx */
+ movw TFCTX0, r30
+ movw CTX0, r24
+ movw BLOCK0, r22
+ movw r26, r24
+/* add BLOCKSIZE_B (32) to tweak */
+ ldi r25, 32
+ ld r24, X
+ add r24, r25
+ st X+, r24
+ ldi r25, 11
+1: ld r24, X
+ adc r24, r1
+ st X+, r24
+ dec r25
+ brne 1b
+/* call threefish256_init */
+ movw r24, CTX0
+ adiw r24, 16
+ movw r22, CTX0
+ movw CTX0, r24 /* CTX points to ctx->g */
+ movw r20, TFCTX0
+ rcall threefish256_init
+ /* copy block to ctx->g */
+ movw r26, CTX0
+ movw r30, BLOCK0
+ ldi r25, 32
+1: ld r24, Z+
+ st X+, r24
+ dec r25
+ brne 1b
+/* call threefish256_enc */
+ movw r24, CTX0
+ movw r22, TFCTX0
+ rcall threefish256_enc
+/* xor block into ctx->g */
+ movw r26, BLOCK0
+ movw r30, CTX0
+ ldi r25, 32
+1: ld r24, X+
+ ld r23, Z
+ eor r23, r24
+ st Z+, r23
+ dec r25
+ brne 1b
+/* clear 'first' bit in tweak */
+ sbiw r30, 33
+ ld r24, Z
+ andi r24, ~0x40
+ st Z, r24
+ pop_range 2, 7
+ stack_free_large 64
+ ret
+void ubi256_lastBlock(ubi256_ctx_t* ctx, const void* block, uint16_t length_b){
+ threefish256_ctx_t tfctx;
+ while(length_b>UBI256_BLOCKSIZE){
+ ubi256_nextBlock(ctx, block);
+ block = (uint8_t*)block + UBI256_BLOCKSIZE_B;
+ length_b -= UBI256_BLOCKSIZE;
+ }
+ ctx->tweak[15] |= 0x80;
+ ((uint64_t*)(ctx->tweak))[0] += (length_b+7)/8;
+ if(length_b & 0x07){
+ ctx->tweak[14] |= 0x80;
+ }
+ threefish256_init(ctx->g, ctx->tweak, &tfctx);
+ memset(ctx->g, 0, UBI256_BLOCKSIZE_B);
+ memcpy(ctx->g, block, (length_b+7)/8);
+ if(length_b & 0x07){
+ ctx->g[((length_b+7)/8)-1] |= 0x80>>(length_b&7);
+ ctx->g[((length_b+7)/8)-1] &= ~((0x80>>(length_b&7))-1);
+ }
+ threefish256_enc(ctx->g, &tfctx);
+ memxor(ctx->g, block, (length_b+7)/8);
+ if(length_b & 0x07){
+ ctx->g[((length_b+7)/8)-1] ^= 0x80>>(length_b&7);
+ }
+ * param ctx: r24:r25
+ * param block: r22:r23
+ * param ength_b: r20:r21
+ */
+MASK_B = 8
+LEN_B = 9
+TFCTX0 = 10
+TFCTX1 = 11
+CTX0 = 12
+CTX1 = 13
+BLOCK0 = 14
+BLOCK1 = 15
+LENGTH0 = 16
+LENGTH1 = 17
+.global ubi256_lastBlock
+/* run nextBlock for preceding blocks*/
+ push_range 8, 17
+ movw CTX0, r24
+ movw BLOCK0, r22
+ movw LENGTH0, r20
+1: cpi LENGTH1, 2
+ brlo 2f
+ movw r24, CTX0
+ movw r22, BLOCK0
+ rcall ubi256_nextBlock
+ ldi r25, 32
+ add BLOCK0, r25
+ adc BLOCK1, r1
+ dec LENGTH1
+ rjmp 1b
+2: tst LENGTH1
+ breq 3f
+ tst LENGTH0
+ breq 3f
+ movw r24, CTX0
+ movw r22, BLOCK0
+ rcall ubi256_nextBlock
+ ldi r25, 32
+ add BLOCK0, r25
+ adc BLOCK1, r1
+ dec LENGTH1
+3: /* now the real fun */
+ stack_alloc_large 64
+ adiw r30, 1
+ movw TFCTX0, r30
+ /* calculate LEN_B */
+ movw r24, LENGTH0
+ adiw r24, 7
+ lsr r25
+ ror r24
+ lsr r24
+ lsr r24
+ mov LEN_B, r24
+ /* add length to tweak */
+ movw r30, CTX0
+ ld r24, Z
+ add r24, LEN_B
+ st Z+, r24
+ ldi r25, 11
+1: ld r24, Z
+ adc r24, r1
+ st Z+, r24
+ dec r25
+ brne 1b
+ /* set 'final' bit*/
+ movw r30, CTX0
+ ldd r24, Z+15
+ ori r24, 0x80
+ std Z+15, r24
+ /* store in T if we do bit processing and set 'BitPad' bit*/
+ clr MASK_B
+ mov r24, LENGTH0
+ andi r24, 0x07
+ tst r24
+ breq 4f
+ ldd r25, Z+14
+ ori r25, 0x80
+ std Z+14, r25
+ ldi r25, 0x80
+ mov MASK_B, r25
+1: lsr MASK_B
+ dec r24
+ brne 1b
+4: /* call threefish256_init*/
+ movw r24, CTX0
+ adiw r24, 16
+ movw r22, CTX0
+ movw CTX0, r24 /* CTX points at ctx->g */
+ movw r20, TFCTX0
+ rcall threefish256_init
+ /* copy block to ctx->g */
+ movw r26, BLOCK0
+ movw r30, CTX0
+ mov r24, LEN_B
+ ldi r25, 32
+ sub r25, LEN_B
+ tst r24
+1: breq 2f
+ ld r22, X+
+ st Z+, r22
+ dec r24
+ rjmp 1b
+2: tst MASK_B
+ breq 29f
+ or r22, MASK_B
+ st -Z, r22
+ adiw r30, 1
+29: tst r25
+3: breq 4f
+ st Z+, r1
+ dec r25
+ rjmp 3b
+4: /* call threefish256_enc */
+ movw r24, CTX0
+ movw r22, TFCTX0
+ rcall threefish256_enc
+ /* xor block into ctx->g */
+ movw r30, CTX0
+ movw r26, BLOCK0
+ tst LEN_B
+5: breq 6f
+ ld r22, X+
+ ld r23, Z
+ eor r23, r22
+ st Z+, r23
+ dec LEN_B
+ rjmp 5b
+6: tst MASK_B
+ breq 7f
+ eor r23, MASK_B
+ st -Z, r23
+7: stack_free_large 64
+ pop_range 8, 17
+ ret