From: bg Date: Mon, 27 Apr 2009 17:47:36 +0000 (+0000) Subject: more shabal X-Git-Url: https://git.cryptolib.org/?p=avr-crypto-lib.git;a=commitdiff_plain;h=600e82c981df8a6e446c6469e7e6be627ec027de more shabal --- diff --git a/host/shavs_test.rb b/host/shavs_test.rb index 74d02c8..225c381 100644 --- a/host/shavs_test.rb +++ b/host/shavs_test.rb @@ -57,13 +57,14 @@ def send_md(md_string) $sp.print(md_string[i].chr) # print("DBG s: "+ md_string[i].chr) if $debug if(i%20==19) - sleep(0.1) +# sleep(0.1) end end end def run_test(filename) errors = 0 + line=1 if not File.exist?(filename) puts("ERROR file "+filename+" does not exist!") end @@ -91,7 +92,8 @@ def run_test(filename) b = (/[\s]*MD[\s]*=[\s]*([0-9a-fA-F]*).*/.match(avr_md))[1]; a.upcase! b.upcase! - puts("") if (pos%$linewidth==0 and $linewidth!=0) + printf("\n%4d: ", line) if (pos%$linewidth==0 and $linewidth!=0) + line += 1 if (pos%$linewidth==0 and $linewidth!=0) #putc((a==b)?'*':'!') if(a==b) putc('*') @@ -131,9 +133,9 @@ init_system() for i in (5..(ARGV.size-1)) errors = run_test(ARGV[i]) if errors == 0 - puts("[ok]") + puts("\n[ok]") else - puts("[errors: "+errors.to_s+"]") + puts("\n[errors: "+errors.to_s+"]") end end $sp.print("EXIT\r"); diff --git a/mkfiles/shabal.mk b/mkfiles/shabal.mk index 5034854..bc12041 100644 --- a/mkfiles/shabal.mk +++ b/mkfiles/shabal.mk @@ -1,11 +1,12 @@ # Makefile for Skein -ALGO_NAME := SHABAL_C +ALGO_NAME := SHABAL # comment out the following line for removement of Skein from the build process HASHES += $(ALGO_NAME) -$(ALGO_NAME)_OBJ := shabal.o +$(ALGO_NAME)_OBJ := shabal-asm.o shabal192-asm.o shabal224-asm.o \ + shabal256-asm.o shabal384-asm.o shabal512-asm.o $(ALGO_NAME)_TEST_BIN := main-shabal-test.o debug.o uart.o hexdigit_tab.o \ dbz_strings.o nessie_common.o cli.o string-extras.o performance_test.o \ nessie_hash_test.o hfal-basic.o hfal_shabal.o shavs.o diff --git a/mkfiles/shabal_c.mk b/mkfiles/shabal_c.mk new file mode 100644 index 0000000..9521743 --- /dev/null +++ b/mkfiles/shabal_c.mk @@ -0,0 +1,14 @@ +# Makefile for Skein +ALGO_NAME := SHABAL_C + +# comment out the following line for removement of Skein from the build process +HASHES += $(ALGO_NAME) + + +$(ALGO_NAME)_OBJ := shabal.o shabal192.o shabal224.o shabal256.o shabal384.o shabal512.o +$(ALGO_NAME)_TEST_BIN := main-shabal-test.o debug.o uart.o hexdigit_tab.o \ + dbz_strings.o nessie_common.o cli.o string-extras.o performance_test.o \ + nessie_hash_test.o hfal-basic.o hfal_shabal.o shavs.o +$(ALGO_NAME)_NESSIE_TEST := test nessie +$(ALGO_NAME)_PERFORMANCE_TEST := performance + diff --git a/shabal-asm.S b/shabal-asm.S new file mode 100644 index 0000000..31879b3 --- /dev/null +++ b/shabal-asm.S @@ -0,0 +1,739 @@ +/* shabal-asm.S */ +/* + This file is part of the AVR-Crypto-Lib. + Copyright (C) 2009 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ +/* + * \file shabal-asm.S + * \author Daniel Otte + * \email daniel.otte@rub.de + * \date 2009-04-27 + * \license GPLv3 or later + */ + +#include "avr-asm-macros.S" + +/******************************************************************************/ +/* +void shabal_p(shabal_ctx_t* ctx, const void* m){ + uint8_t i,j; + for(i=0;i<16;++i){ + ctx->b[i] = ROTL32(ctx->b[i],17); + } + for(j=0;ja[(i+16*j)%SHABAL_R] = + shabal_u(ctx->a[(i+16*j)%SHABAL_R] + ^ shabal_v(ROTL32(ctx->a[(i+16*j+SHABAL_R-1)%SHABAL_R],15)) + ^ ctx->c[(8-i+16)%16]) + ^ ctx->b[(i+SHABAL_O1)%16] + ^ ((ctx->b[(i+SHABAL_O2)%16]) & ~(ctx->b[(i+SHABAL_O3)%16])) + ^ ((uint32_t*)m)[i]; + ctx->b[i] = ROTL32(ctx->b[i], 1) ^ ~(ctx->a[(i+16*j)%SHABAL_R]); + } + } + + for(j=0;j<36;++j){ + ctx->a[j%SHABAL_R] += ctx->c[(j+3)%16]; + } +} +*/ +MB0 = 2 +MB1 = 3 +AB0 = 4 +AB1 = 5 +BB0 = 6 +BB1 = 7 +CB0 = 8 +CB1 = 9 +AL0 = 10 +AL1 = 11 +AL2 = 12 +AL3 = 13 +A0 = 14 +A1 = 15 +A2 = 16 +A3 = 17 +B0 = 18 +B1 = 19 +B2 = 20 +B3 = 21 +I = 22 +J = 23 +T0 = 26 +T1 = 27 +T2 = 28 +T3 = 29 +/* + * param ctx: r24:r25 + * param m: r22:r23 + */ +; .global shabal_p +shabal_p: + push_range 2, 17 + push r28 + push r29 + movw MB0, r22 + movw r30, r24 + adiw r30, 8 + ld BB0, Z+ + ld BB1, Z+ + ld CB0, Z+ + ld CB1, Z+ + movw AB0, r30 + movw r30, BB0 + adiw r30, 16*4-1 + adiw r30, 1 + ldi r24, 16 +1: + ld A3, -Z + ld A2, -Z + ld A1, -Z + ld A0, -Z + mov r0, A3 + rol r0 + rol A0 + rol A1 + rol A2 + rol A3 + std Z+0, A2 + std Z+1, A3 + std Z+2, A0 + std Z+3, A1 + dec r24 + brne 1b + movw B0, A2 + movw B2, A0 + /* load ctx->a[(i+16*j-1)%12]*/ + movw r26, AB0 + adiw r26, 4*11 + ld AL0, X+ + ld AL1, X+ + ld AL2, X+ + ld AL3, X+ + clr I + clr J +1: + /* ROTL32(AL, 15)*/ + movw T0, AL2 + movw T2, AL0 + mov r0, T0 + ror r0 + ror T3 + ror T2 + ror T1 + ror T0 + movw AL0, T0 + movw AL2, T2 + /* apply V to AL */ + movw A0, AL0 + movw A2, AL2 + lsl A0 + rol A1 + rol A2 + rol A3 + lsl A0 + rol A1 + rol A2 + rol A3 + add A0, AL0 + adc A1, AL1 + adc A2, AL2 + adc A3, AL3 + /* xor in ctx->c[(8-i+16)%16] */ + ldi T0, 24 + sub T0, I + andi T0, 0x0f + lsl T0 + lsl T0 + movw r30, CB0 + add r30, T0 + adc r31, r1 + ld r0, Z+ + eor A0, r0 + ld r0, Z+ + eor A1, r0 + ld r0, Z+ + eor A2, r0 + ld r0, Z+ + eor A3, r0 + /* xor in ctx->a[(i+16*j)%12] */ + mov T0, J + swap T0 /* *=16 */ + add T0, I + ldi r30, lo8(mod12table) + ldi r31, hi8(mod12table) + add r30, T0 + adc r31, r1 + lpm T0, Z + movw r30, AB0 + add r30, T0 + adc r31, r1 + movw T2, r30 + ld r0, Z+ + eor A0, r0 + ld r0, Z+ + eor A1, r0 + ld r0, Z+ + eor A2, r0 + ld r0, Z+ + eor A3, r0 + /* AL = 3*A */ + movw AL0, A0 + movw AL2, A2 + lsl AL0 + rol AL1 + rol AL2 + rol AL3 + add AL0, A0 + adc AL1, A1 + adc AL2, A2 + adc AL3, A3 + /* xor in ctx->b[(i+13)%16] */ + ldi T0, 13 + add T0, I + andi T0, 0x0f + lsl T0 + lsl T0 + movw r30, BB0 + add r30, T0 + adc r31, r1 + ld r0, Z+ + eor AL0, r0 + ld r0, Z+ + eor AL1, r0 + ld r0, Z+ + eor AL2, r0 + ld r0, Z+ + eor AL3, r0 + /* load ctx->b[(i+9)%16] into A */ + ldi T0, 9 + add T0, I + andi T0, 0x0f + lsl T0 + lsl T0 + movw r30, BB0 + add r30, T0 + adc r31, r1 + ld A0, Z+ + ld A1, Z+ + ld A2, Z+ + ld A3, Z+ + /* and in ~(ctx->b[(i+6)%16]) */ + ldi T0, 6 + add T0, I + andi T0, 0x0f + lsl T0 + lsl T0 + movw r30, BB0 + add r30, T0 + adc r31, r1 + ld r0, Z+ + com r0 + and A0, r0 + ld r0, Z+ + com r0 + and A1, r0 + ld r0, Z+ + com r0 + and A2, r0 + ld r0, Z+ + com r0 + and A3, r0 + /* xor A into AL */ + eor AL0, A0 + eor AL1, A1 + eor AL2, A2 + eor AL3, A3 + /* xor m[i] into AL */ + mov T0, I + lsl T0 + lsl T0 + movw r30, MB0 + add r30, T0 + adc r31, r1 + ld r0, Z+ + eor AL0, r0 + ld r0, Z+ + eor AL1, r0 + ld r0, Z+ + eor AL2, r0 + ld r0, Z+ + eor AL3, r0 + /* A (AL) is done, now store it */ + movw r30, T2 + st Z+, AL0 + st Z+, AL1 + st Z+, AL2 + st Z+, AL3 + /* process ctx->b[i] */ + /* ROTL32(b, 1)*/ + mov r0, B3 + rol r0 + rol B0 + rol B1 + rol B2 + rol B3 + /* xor in ~(ctx->a[(i+16*j)%SHABAL_R]) */ + movw A0, AL0 + movw A2, AL2 + com A0 + com A1 + com A2 + com A3 + eor B0, A0 + eor B1, A1 + eor B2, A2 + eor B3, A3 + /* store B */ + movw r30, BB0 + mov T0, I + lsl T0 + lsl T0 + add r30, T0 + adc r31, r1 + st Z+, B0 + st Z+, B1 + st Z+, B2 + st Z+, B3 + inc I + cpi I, 16 + brne local_reload + inc J + cpi J, 3 + brne global_reload + rjmp addition +global_reload: + clr I +local_reload: + mov T0, I + lsl T0 + lsl T0 + movw r30, BB0 + add r30, T0 + adc r31, r1 + ld B0, Z+ + ld B1, Z+ + ld B2, Z+ + ld B3, Z+ + + rjmp 1b +addition: + clr J + movw r30, AB0 + movw r26, CB0 + adiw r26, 3*4 +1: + /* J = 0..11 */ + ld AL0, X+ + ld A0, Z + add A0, AL0 + st Z+, A0 + ld AL0, X+ + ld A0, Z + adc A0, AL0 + st Z+, A0 + ld AL0, X+ + ld A0, Z + adc A0, AL0 + st Z+, A0 + ld AL0, X+ + ld A0, Z + adc A0, AL0 + st Z+, A0 + inc J + cpi J, 12 + brne 1b + /* J = 12 */ + movw r30, AB0 + ld AL0, X+ + ld A0, Z + add A0, AL0 + st Z+, A0 + ld AL0, X+ + ld A0, Z + adc A0, AL0 + st Z+, A0 + ld AL0, X+ + ld A0, Z + adc A0, AL0 + st Z+, A0 + ld AL0, X+ + ld A0, Z + adc A0, AL0 + st Z+, A0 + inc J + /* J= 13..23*/ + movw r26, CB0 +1: + ld AL0, X+ + ld A0, Z + add A0, AL0 + st Z+, A0 + ld AL0, X+ + ld A0, Z + adc A0, AL0 + st Z+, A0 + ld AL0, X+ + ld A0, Z + adc A0, AL0 + st Z+, A0 + ld AL0, X+ + ld A0, Z + adc A0, AL0 + st Z+, A0 + inc J + cpi J, 24 + brne 1b + /* J= 24..28*/ + movw r30, AB0 +1: + ld AL0, X+ + ld A0, Z + add A0, AL0 + st Z+, A0 + ld AL0, X+ + ld A0, Z + adc A0, AL0 + st Z+, A0 + ld AL0, X+ + ld A0, Z + adc A0, AL0 + st Z+, A0 + ld AL0, X+ + ld A0, Z + adc A0, AL0 + st Z+, A0 + inc J + cpi J, 29 + brne 1b + + /* J= 29..35*/ + movw r26, CB0 +1: + ld AL0, X+ + ld A0, Z + add A0, AL0 + st Z+, A0 + ld AL0, X+ + ld A0, Z + adc A0, AL0 + st Z+, A0 + ld AL0, X+ + ld A0, Z + adc A0, AL0 + st Z+, A0 + ld AL0, X+ + ld A0, Z + adc A0, AL0 + st Z+, A0 + inc J + cpi J, 36 + brne 1b +exit: + pop r29 + pop r28 + pop_range 2, 17 + ret + +mod12table: + .byte 0, 4, 8, 12, 16, 20, 24, 28 + .byte 32, 36, 40, 44, 0, 4, 8, 12 + .byte 16, 20, 24, 28, 32, 36, 40, 44 + .byte 0, 4, 8, 12, 16, 20, 24, 28 + .byte 32, 36, 40, 44, 0, 4, 8, 12 + .byte 16, 20, 24, 28, 32, 36, 40, 44 + +/******************************************************************************/ +/* +void shabal_nextBlock(shabal_ctx_t* ctx, const void* block){ + uint8_t i; + uint32_t* t; + for(i=0;i<16;++i){ + ctx->b[i] += ((uint32_t*)block)[i]; + } + ctx->a[0] ^= ctx->w.w32[0]; + ctx->a[1] ^= ctx->w.w32[1]; + shabal_p(ctx, block); + for(i=0;i<16;++i){ + ctx->c[i] -= ((uint32_t*)block)[i]; + } + ctx->w.w64++; + t = ctx->c; + ctx->c = ctx->b; + ctx->b = t; +} +*/ +/* + * param ctx: r24:r25 + * param block: r22:r23 + */ +MB0 = 14 +MB1 = 15 +CTX0 = 16 +CTX1 = 17 +.global shabal_nextBlock +shabal_nextBlock: + push_range 14, 17 + movw CTX0, r24 + movw MB0, r22 + /* xor W into A and increment W */ + movw r30, CTX0 + ldi r19, 8 + sec +1: + ld r20, Z + ldd r21, Z+(8+4) + eor r21, r20 + std Z+(8+4), r21 + adc r20, r1 + st Z+, r20 + dec r19 + brne 1b + /* add block to ctx->b */ + ld r26, Z+ + ld r27, Z + movw r30, MB0 + ldi r19, 16 +1: + ld r0, X + ld r18, Z+ + add r0, r18 + st X+, r0 + ld r0, X + ld r18, Z+ + adc r0, r18 + st X+, r0 + ld r0, X + ld r18, Z+ + adc r0, r18 + st X+, r0 + ld r0, X + ld r18, Z+ + adc r0, r18 + st X+, r0 + dec r19 + brne 1b + /* call shbal_p */ + rcall shabal_p + /* sub block from ctx->c */ + movw r30, CTX0 + adiw r30, 8+2 + ld r26, Z+ + ld r27, Z + movw r30, MB0 + ldi r19, 16 +1: + ld r0, X + ld r18, Z+ + sub r0, r18 + st X+, r0 + ld r0, X + ld r18, Z+ + sbc r0, r18 + st X+, r0 + ld r0, X + ld r18, Z+ + sbc r0, r18 + st X+, r0 + ld r0, X + ld r18, Z+ + sbc r0, r18 + st X+, r0 + dec r19 + brne 1b + /* xchange ctx->b with ctx->c*/ + movw r30, CTX0 + ldd r22, Z+8 + ldd r23, Z+9 + ldd r24, Z+10 + ldd r25, Z+11 + std Z+10, r22 + std Z+11, r23 + std Z+8, r24 + std Z+9, r25 + pop_range 14, 17 + ret + +/******************************************************************************/ +/* +void shabal_lastBlock(shabal_ctx_t* ctx, const void* block, uint16_t length_b){ + uint8_t i,j; + uint32_t* t; + uint8_t buffer[64]; + while(length_b>=SHABAL_BLOCKSIZE){ + shabal_nextBlock(ctx, block); + block = (uint8_t*)block + SHABAL_BLOCKSIZE_B; + length_b -= SHABAL_BLOCKSIZE; + } + memset(buffer, 0, 64); + memcpy(buffer, block, (length_b+7)/8); + buffer[length_b/8] |= 0x80>>(length_b%8); + for(i=0;i<16;++i){ + ctx->b[i] += ((uint32_t*)buffer)[i]; + } + for(j=0; j<4;++j){ + ctx->a[0] ^= ctx->w.w32[0]; + ctx->a[1] ^= ctx->w.w32[1]; + shabal_p(ctx, buffer); + t = ctx->c; + ctx->c = ctx->b; + ctx->b = t; + } +} +*/ +I = 16 +LEN0 = 16 +LEN1 = 17 +CTX0 = 14 +CTX1 = 15 +MB0 = 12 +MB1 = 13 +/* + * param ctx: r24:r25 + * param block: r22:r23 + * param length_b: r20:r21 + */ +.global shabal_lastBlock +shabal_lastBlock: + push_range 12, 17 + movw CTX0, r24 + movw MB0, r22 + movw LEN0, r20 +1: + cpi LEN1, 0x02 + brlo 2f + movw r24, CTX0 + movw r22, MB0 + rcall shabal_nextBlock + subi LEN1, 0x02 + ldi r18, 64 + add MB0, r18 + adc MB1, r1 + rjmp 1b +2: + stack_alloc_large 64 + adiw r30, 1 /* Z points at buffer */ + movw r26, MB0 + /* r24 = LEN/8*/ + movw r24, LEN0 + lsr r25 + ror r24 + lsr r24 + lsr r24 + ldi r25, 64-1 + sub r25, r24 + tst r24 + breq 32f +31: + ld r0, X+ + st Z+, r0 + dec r24 + brne 31b +32: + ldi r18, 0x80 + andi LEN0, 0x07 + breq append_0x80 + ld r0, X+ +33: + lsr r18 + dec LEN0 + brne 33b + or r0, r18 + st Z+, r0 + rjmp append_zeros +append_0x80: + st Z+, r18 +append_zeros: + tst r25 + breq 4f +34: st Z+, r1 + dec r25 + brne 34b +4: + sbiw r30, 63 + sbiw r30, 1 + movw MB0, r30 + movw r26, CTX0 + adiw r26, 8 + ld r24, X+ + ld r25, X + movw r26, r24 + ldi r18, 16 +41: + ld r24, X + ld r25, Z+ + add r24, r25 + st X+, r24 + ld r24, X + ld r25, Z+ + adc r24, r25 + st X+, r24 + ld r24, X + ld r25, Z+ + adc r24, r25 + st X+, r24 + ld r24, X + ld r25, Z+ + adc r24, r25 + st X+, r24 + dec r18 + brne 41b + /* final loop */ + ldi I, 4 +5: + /* xor W into A */ + movw r30, CTX0 + ldi r19, 8 +51: + ld r24, Z+ + ldd r25, Z+(8+4-1) + eor r24, r25 + std Z+(8+4-1), r24 + dec r19 + brne 51b + movw r24, CTX0 + movw r22, MB0 + rcall shabal_p + movw r30, CTX0 + ldd r22, Z+8 + ldd r23, Z+9 + ldd r24, Z+10 + ldd r25, Z+11 + std Z+10, r22 + std Z+11, r23 + std Z+8, r24 + std Z+9, r25 + dec I + brne 5b + + stack_free_large 64 + pop_range 12, 17 + ret + + + + + + + + + + + + + + diff --git a/shabal.c b/shabal.c index 1fa0f6d..f18908a 100644 --- a/shabal.c +++ b/shabal.c @@ -27,7 +27,6 @@ #include #include "shabal.h" -#include #include #define SHABAL_O1 13 @@ -53,193 +52,22 @@ void shabal_p(shabal_ctx_t* ctx, const void* m){ for(i=0;i<16;++i){ ctx->b[i] = ROTL32(ctx->b[i],17); } - for(j=0;ja[(i+16*j)%SHABAL_R] = - shabal_u(ctx->a[(i+16*j)%SHABAL_R] - ^ shabal_v(ROTL32(ctx->a[(i+16*j+SHABAL_R-1)%SHABAL_R],15)) + ctx->a[(i+j*16)%SHABAL_R] = + shabal_u(ctx->a[(i+j*16)%SHABAL_R] + ^ shabal_v(ROTL32(ctx->a[(i+j*16+SHABAL_R-1)%SHABAL_R],15)) ^ ctx->c[(8-i+16)%16]) ^ ctx->b[(i+SHABAL_O1)%16] ^ ((ctx->b[(i+SHABAL_O2)%16]) & ~(ctx->b[(i+SHABAL_O3)%16])) ^ ((uint32_t*)m)[i]; - ctx->b[i] = ROTL32(ctx->b[i], 1) ^ ~(ctx->a[(i+16*j)%SHABAL_R]); + ctx->b[i] = ROTL32(ctx->b[i], 1) ^ ~(ctx->a[(i+j*16)%SHABAL_R]); } } for(j=0;j<36;++j){ ctx->a[j%SHABAL_R] += ctx->c[(j+3)%16]; } -/* - for (j=0; j<36; j++){ - ctx->a[(36 - 1 - j)%12] = - ctx->a[(36 - 1 - j) % 12] - + ctx->c[(36 * 16 + 6 - j) % 16]; - - } -*/ -} - -uint32_t shabal192_iv[] PROGMEM = { - /* A */ - 0xFD749ED4, 0xB798E530, 0x33904B6F, 0x46BDA85E, - 0x076934B4, 0x454B4058, 0x77F74527, 0xFB4CF465, - 0x62931DA9, 0xE778C8DB, 0x22B3998E, 0xAC15CFB9, - /* B */ - 0x58BCBAC4, 0xEC47A08E, 0xAEE933B2, 0xDFCBC824, - 0xA7944804, 0xBF65BDB0, 0x5A9D4502, 0x59979AF7, - 0xC5CEA54E, 0x4B6B8150, 0x16E71909, 0x7D632319, - 0x930573A0, 0xF34C63D1, 0xCAF914B4, 0xFDD6612C, - /* C */ - 0x61550878, 0x89EF2B75, 0xA1660C46, 0x7EF3855B, - 0x7297B58C, 0x1BC67793, 0x7FB1C723, 0xB66FC640, - 0x1A48B71C, 0xF0976D17, 0x088CE80A, 0xA454EDF3, - 0x1C096BF4, 0xAC76224B, 0x5215781C, 0xCD5D2669 -}; - - -uint32_t shabal224_iv[] PROGMEM = { - /* A */ - 0xA5201467, 0xA9B8D94A, 0xD4CED997, 0x68379D7B, - 0xA7FC73BA, 0xF1A2546B, 0x606782BF, 0xE0BCFD0F, - 0x2F25374E, 0x069A149F, 0x5E2DFF25, 0xFAECF061, - /* B */ - 0xEC9905D8, 0xF21850CF, 0xC0A746C8, 0x21DAD498, - 0x35156EEB, 0x088C97F2, 0x26303E40, 0x8A2D4FB5, - 0xFEEE44B6, 0x8A1E9573, 0x7B81111A, 0xCBC139F0, - 0xA3513861, 0x1D2C362E, 0x918C580E, 0xB58E1B9C, - /* C */ - 0xE4B573A1, 0x4C1A0880, 0x1E907C51, 0x04807EFD, - 0x3AD8CDE5, 0x16B21302, 0x02512C53, 0x2204CB18, - 0x99405F2D, 0xE5B648A1, 0x70AB1D43, 0xA10C25C2, - 0x16F1AC05, 0x38BBEB56, 0x9B01DC60, 0xB1096D83 -}; - -uint32_t shabal256_iv[] PROGMEM = { - /* A */ - 0x52F84552, 0xE54B7999, 0x2D8EE3EC, 0xB9645191, - 0xE0078B86, 0xBB7C44C9, 0xD2B5C1CA, 0xB0D2EB8C, - 0x14CE5A45, 0x22AF50DC, 0xEFFDBC6B, 0xEB21B74A, - /* B */ - 0xB555C6EE, 0x3E710596, 0xA72A652F, 0x9301515F, - 0xDA28C1FA, 0x696FD868, 0x9CB6BF72, 0x0AFE4002, - 0xA6E03615, 0x5138C1D4, 0xBE216306, 0xB38B8890, - 0x3EA8B96B, 0x3299ACE4, 0x30924DD4, 0x55CB34A5, - /* C */ - 0xB405F031, 0xC4233EBA, 0xB3733979, 0xC0DD9D55, - 0xC51C28AE, 0xA327B8E1, 0x56C56167, 0xED614433, - 0x88B59D60, 0x60E2CEBA, 0x758B4B8B, 0x83E82A7F, - 0xBC968828, 0xE6E00BF7, 0xBA839E55, 0x9B491C60 -}; - -uint32_t shabal384_iv[] PROGMEM = { - /* A */ - 0xC8FCA331, 0xE55C504E, 0x003EBF26, 0xBB6B8D83, - 0x7B0448C1, 0x41B82789, 0x0A7C9601, 0x8D659CFF, - 0xB6E2673E, 0xCA54C77B, 0x1460FD7E, 0x3FCB8F2D, - /* B */ - 0x527291FC, 0x2A16455F, 0x78E627E5, 0x944F169F, - 0x1CA6F016, 0xA854EA25, 0x8DB98ABE, 0xF2C62641, - 0x30117DCB, 0xCF5C4309, 0x93711A25, 0xF9F671B8, - 0xB01D2116, 0x333F4B89, 0xB285D165, 0x86829B36, - /* C */ - 0xF764B11A, 0x76172146, 0xCEF6934D, 0xC6D28399, - 0xFE095F61, 0x5E6018B4, 0x5048ECF5, 0x51353261, - 0x6E6E36DC, 0x63130DAD, 0xA9C69BD6, 0x1E90EA0C, - 0x7C35073B, 0x28D95E6D, 0xAA340E0D, 0xCB3DEE70 -}; - -uint32_t shabal512_iv[] PROGMEM = { - /* A */ - 0x20728DFD, 0x46C0BD53, 0xE782B699, 0x55304632, - 0x71B4EF90, 0x0EA9E82C, 0xDBB930F1, 0xFAD06B8B, - 0xBE0CAE40, 0x8BD14410, 0x76D2ADAC, 0x28ACAB7F, - /* B */ - 0xC1099CB7, 0x07B385F3, 0xE7442C26, 0xCC8AD640, - 0xEB6F56C7, 0x1EA81AA9, 0x73B9D314, 0x1DE85D08, - 0x48910A5A, 0x893B22DB, 0xC5A0DF44, 0xBBC4324E, - 0x72D2F240, 0x75941D99, 0x6D8BDE82, 0xA1A7502B, - /* C */ - 0xD9BF68D1, 0x58BAD750, 0x56028CB2, 0x8134F359, - 0xB5D469D8, 0x941A8CC2, 0x418B2A6E, 0x04052780, - 0x7F07D787, 0x5194358F, 0x3C60D665, 0xBE97D79A, - 0x950C3434, 0xAED9A06D, 0x2537DC8D, 0x7CDB5969, -}; - -void shabal192_init(shabal_ctx_t* ctx){ - uint8_t i; - ctx->b = ctx->b_buffer; - ctx->c = ctx->c_buffer; - ctx->w.w64 = 1LL; - for(i=0;ia[i] = pgm_read_dword(&(shabal192_iv[i])); - } - for(i=0;i<16;++i){ - ctx->b[i] = pgm_read_dword(&(shabal192_iv[SHABAL_R+i])); - } - for(i=0;i<16;++i){ - ctx->c[i] = pgm_read_dword(&(shabal192_iv[SHABAL_R+16+i])); - } -} - -void shabal224_init(shabal_ctx_t* ctx){ - uint8_t i; - ctx->b = ctx->b_buffer; - ctx->c = ctx->c_buffer; - ctx->w.w64 = 1LL; - for(i=0;ia[i] = pgm_read_dword(&(shabal224_iv[i])); - } - for(i=0;i<16;++i){ - ctx->b[i] = pgm_read_dword(&(shabal224_iv[SHABAL_R+i])); - } - for(i=0;i<16;++i){ - ctx->c[i] = pgm_read_dword(&(shabal224_iv[SHABAL_R+16+i])); - } -} -void shabal256_init(shabal_ctx_t* ctx){ - uint8_t i; - ctx->b = ctx->b_buffer; - ctx->c = ctx->c_buffer; - ctx->w.w64 = 1LL; - for(i=0;ia[i] = pgm_read_dword(&(shabal256_iv[i])); - } - for(i=0;i<16;++i){ - ctx->b[i] = pgm_read_dword(&(shabal256_iv[SHABAL_R+i])); - } - for(i=0;i<16;++i){ - ctx->c[i] = pgm_read_dword(&(shabal256_iv[SHABAL_R+16+i])); - } -} -void shabal384_init(shabal_ctx_t* ctx){ - uint8_t i; - ctx->b = ctx->b_buffer; - ctx->c = ctx->c_buffer; - ctx->w.w64 = 1LL; - for(i=0;ia[i] = pgm_read_dword(&(shabal384_iv[i])); - } - for(i=0;i<16;++i){ - ctx->b[i] = pgm_read_dword(&(shabal384_iv[SHABAL_R+i])); - } - for(i=0;i<16;++i){ - ctx->c[i] = pgm_read_dword(&(shabal384_iv[SHABAL_R+16+i])); - } -} -void shabal512_init(shabal_ctx_t* ctx){ - uint8_t i; - ctx->b = ctx->b_buffer; - ctx->c = ctx->c_buffer; - ctx->w.w64 = 1LL; - for(i=0;ia[i] = pgm_read_dword(&(shabal512_iv[i])); - } - for(i=0;i<16;++i){ - ctx->b[i] = pgm_read_dword(&(shabal512_iv[SHABAL_R+i])); - } - for(i=0;i<16;++i){ - ctx->c[i] = pgm_read_dword(&(shabal512_iv[SHABAL_R+16+i])); - } } void shabal_nextBlock(shabal_ctx_t* ctx, const void* block){ @@ -276,14 +104,8 @@ void shabal_lastBlock(shabal_ctx_t* ctx, const void* block, uint16_t length_b){ for(i=0;i<16;++i){ ctx->b[i] += ((uint32_t*)buffer)[i]; } - ctx->a[0] ^= ctx->w.w32[0]; - ctx->a[1] ^= ctx->w.w32[1]; - shabal_p(ctx, buffer); - t = ctx->c; - ctx->c = ctx->b; - ctx->b = t; - for(j=0; j<3;++j){ + for(j=0; j<4;++j){ ctx->a[0] ^= ctx->w.w32[0]; ctx->a[1] ^= ctx->w.w32[1]; shabal_p(ctx, buffer); @@ -294,87 +116,6 @@ void shabal_lastBlock(shabal_ctx_t* ctx, const void* block, uint16_t length_b){ } -static inline void shabal_ctx2hash(void* dest, const shabal_ctx_t* ctx, uint16_t outlength_b){ memcpy(dest, &(ctx->c[16-outlength_b/32]), outlength_b/8); } - -void shabal192_ctx2hash(void* dest, const shabal_ctx_t* ctx){ - shabal_ctx2hash(dest, ctx, 192); -} - -void shabal224_ctx2hash(void* dest, const shabal_ctx_t* ctx){ - shabal_ctx2hash(dest, ctx, 224); -} - -void shabal256_ctx2hash(void* dest, const shabal_ctx_t* ctx){ - shabal_ctx2hash(dest, ctx, 256); -} - -void shabal384_ctx2hash(void* dest, const shabal_ctx_t* ctx){ - shabal_ctx2hash(dest, ctx, 384); -} - -void shabal512_ctx2hash(void* dest, const shabal_ctx_t* ctx){ - shabal_ctx2hash(dest, ctx, 512); -} - -void shabal192(void* dest, void* msg, uint32_t length_b){ - shabal_ctx_t ctx; - shabal192_init(&ctx); - while(length_b>=SHABAL_BLOCKSIZE){ - shabal_nextBlock(&ctx, msg); - msg = (uint8_t*)msg+SHABAL_BLOCKSIZE_B; - length_b -= SHABAL_BLOCKSIZE; - } - shabal_lastBlock(&ctx, msg, length_b); - shabal192_ctx2hash(dest, &ctx); -} - -void shabal224(void* dest, void* msg, uint32_t length_b){ - shabal_ctx_t ctx; - shabal224_init(&ctx); - while(length_b>=SHABAL_BLOCKSIZE){ - shabal_nextBlock(&ctx, msg); - msg = (uint8_t*)msg+SHABAL_BLOCKSIZE_B; - length_b -= SHABAL_BLOCKSIZE; - } - shabal_lastBlock(&ctx, msg, length_b); - shabal224_ctx2hash(dest, &ctx); -} - -void shabal256(void* dest, void* msg, uint32_t length_b){ - shabal_ctx_t ctx; - shabal256_init(&ctx); - while(length_b>=SHABAL_BLOCKSIZE){ - shabal_nextBlock(&ctx, msg); - msg = (uint8_t*)msg+SHABAL_BLOCKSIZE_B; - length_b -= SHABAL_BLOCKSIZE; - } - shabal_lastBlock(&ctx, msg, length_b); - shabal256_ctx2hash(dest, &ctx); -} - -void shabal384(void* dest, void* msg, uint32_t length_b){ - shabal_ctx_t ctx; - shabal384_init(&ctx); - while(length_b>=SHABAL_BLOCKSIZE){ - shabal_nextBlock(&ctx, msg); - msg = (uint8_t*)msg+SHABAL_BLOCKSIZE_B; - length_b -= SHABAL_BLOCKSIZE; - } - shabal_lastBlock(&ctx, msg, length_b); - shabal384_ctx2hash(dest, &ctx); -} - -void shabal512(void* dest, void* msg, uint32_t length_b){ - shabal_ctx_t ctx; - shabal512_init(&ctx); - while(length_b>=SHABAL_BLOCKSIZE){ - shabal_nextBlock(&ctx, msg); - msg = (uint8_t*)msg+SHABAL_BLOCKSIZE_B; - length_b -= SHABAL_BLOCKSIZE; - } - shabal_lastBlock(&ctx, msg, length_b); - shabal512_ctx2hash(dest, &ctx); -} diff --git a/shabal.h b/shabal.h index 5a9f921..fab23a1 100644 --- a/shabal.h +++ b/shabal.h @@ -38,13 +38,13 @@ typedef struct{ union{ - uint64_t w64; - uint32_t w32[2]; + uint64_t w64; + uint32_t w32[2]; } w; /* the counter */ - uint32_t a[SHABAL_R]; uint32_t *b; - uint32_t b_buffer[16]; uint32_t *c; + uint32_t a[SHABAL_R]; + uint32_t b_buffer[16]; uint32_t c_buffer[16]; }shabal_ctx_t; @@ -70,4 +70,6 @@ void shabal256(void* dest, void* msg, uint32_t length_b); void shabal384(void* dest, void* msg, uint32_t length_b); void shabal512(void* dest, void* msg, uint32_t length_b); +void shabal_ctx2hash(void* dest, const shabal_ctx_t* ctx, uint16_t outlength_b); + #endif /* SHABAL_H_ */ diff --git a/shabal192-asm.S b/shabal192-asm.S new file mode 100644 index 0000000..09b41d5 --- /dev/null +++ b/shabal192-asm.S @@ -0,0 +1,191 @@ +/* shabal192-asm.S */ +/* + This file is part of the AVR-Crypto-Lib. + Copyright (C) 2009 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ +/* + * \file shabal192-asm.S + * \author Daniel Otte + * \email daniel.otte@rub.de + * \date 2009-04-27 + * \license GPLv3 or later + */ + +#include "avr-asm-macros.S" + +shabal192_iv: + /* A */ +.long 0xFD749ED4, 0xB798E530, 0x33904B6F, 0x46BDA85E +.long 0x076934B4, 0x454B4058, 0x77F74527, 0xFB4CF465 +.long 0x62931DA9, 0xE778C8DB, 0x22B3998E, 0xAC15CFB9 + /* B */ +.long 0x58BCBAC4, 0xEC47A08E, 0xAEE933B2, 0xDFCBC824 +.long 0xA7944804, 0xBF65BDB0, 0x5A9D4502, 0x59979AF7 +.long 0xC5CEA54E, 0x4B6B8150, 0x16E71909, 0x7D632319 +.long 0x930573A0, 0xF34C63D1, 0xCAF914B4, 0xFDD6612C + /* C */ +.long 0x61550878, 0x89EF2B75, 0xA1660C46, 0x7EF3855B +.long 0x7297B58C, 0x1BC67793, 0x7FB1C723, 0xB66FC640 +.long 0x1A48B71C, 0xF0976D17, 0x088CE80A, 0xA454EDF3 +.long 0x1C096BF4, 0xAC76224B, 0x5215781C, 0xCD5D2669 + + +/******************************************************************************/ +/* +void shabal192_init(shabal_ctx_t* ctx){ + uint8_t i; + ctx->b = ctx->b_buffer; + ctx->c = ctx->c_buffer; + ctx->w.w64 = 1LL; + for(i=0;ia[i] = pgm_read_dword(&(shabal192_iv[i])); + } + for(i=0;i<16;++i){ + ctx->b[i] = pgm_read_dword(&(shabal192_iv[SHABAL_R+i])); + } + for(i=0;i<16;++i){ + ctx->c[i] = pgm_read_dword(&(shabal192_iv[SHABAL_R+16+i])); + } +} +*/ +/* + * param ctx: r24,r25 + */ +.global shabal192_init +shabal192_init: + movw r26, r24 + ldi r24, 1 + st X+, r24 + st X+, r1 + st X+, r1 + st X+, r1 + st X+, r1 + st X+, r1 + st X+, r1 + st X+, r1 + movw r24, r26 + adiw r24, 12*4+2+2 + st X+, r24 + st X+, r25 + adiw r24, 4*16-1 + adiw r24, 1 + st X+, r24 + st X+, r25 + ldi r24, (12+16+16) + ldi r30, lo8(shabal192_iv) + ldi r31, hi8(shabal192_iv) +1: lpm r0, Z+ + st X+, r0 + lpm r0, Z+ + st X+, r0 + lpm r0, Z+ + st X+, r0 + lpm r0, Z+ + st X+, r0 + dec r24 + brne 1b + ret + +/******************************************************************************/ +/* + * param dest: r24:r25 + * param ctx: r22:r23 + */ +.global shabal192_ctx2hash +shabal192_ctx2hash: + movw r26, r24 + movw r30, r22 + ldd r24, Z+(8+2) + ldd r25, Z+(8+2+1) + movw r30, r24 + adiw r30, (16-192/32)*4 + ldi r24, 192/8 +1: ld r0, Z+ + st X+, r0 + dec r24 + brne 1b + ret + +/******************************************************************************/ +/* +void shabal192(void* dest, void* msg, uint32_t length_b){ + shabal_ctx_t ctx; + shabal192_init(&ctx); + while(length_b>=SHABAL_BLOCKSIZE){ + shabal_nextBlock(&ctx, msg); + msg = (uint8_t*)msg+SHABAL_BLOCKSIZE_B; + length_b -= SHABAL_BLOCKSIZE; + } + shabal_lastBlock(&ctx, msg, length_b); + shabal192_ctx2hash(dest, &ctx); +} +*/ +/* + * param dest: r24:r25 + * param msg: r22:r23 + * param length_b: r18:r21 + */ +DST0 = 8 +DST1 = 9 +CTX0 = 10 +CTX1 = 11 +MSG0 = 12 +MSG1 = 13 +LEN2 = 14 +LEN3 = 15 +LEN0 = 16 +LEN1 = 17 +.global shabal192 +shabal192: + push_range 8, 17 + stack_alloc_large 188 + adiw r30, 1 /* Z points to stack space (ctx) */ + movw CTX0, r30 + movw DST0, r24 + movw MSG0, r22 + movw LEN0, r18 + movw LEN2, r20 + movw r24, r30 + rcall shabal192_init +2: + tst LEN2 + brne 3f + tst LEN3 + breq 4f +3: + movw r24, CTX0 + movw r22, MSG0 + rcall shabal_nextBlock + subi LEN1, 0x02 + sbc LEN2, r1 + sbc LEN3, r1 + ldi r18, 64 + add MSG0, r18 + adc MSG1, r1 + rjmp 2b +4: + movw r24, CTX0 + movw r22, MSG0 + movw r20, LEN0 + rcall shabal_lastBlock + movw r24, DST0 + movw r22, CTX0 + rcall shabal192_ctx2hash + stack_free_large2 188 + pop_range 8, 17 + ret + + diff --git a/shabal224-asm.S b/shabal224-asm.S new file mode 100644 index 0000000..1168d1b --- /dev/null +++ b/shabal224-asm.S @@ -0,0 +1,160 @@ +/* shabal224-asm.S */ +/* + This file is part of the AVR-Crypto-Lib. + Copyright (C) 2009 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ +/* + * \file shabal224-asm.S + * \author Daniel Otte + * \email daniel.otte@rub.de + * \date 2009-04-27 + * \license GPLv3 or later + */ + +#include "avr-asm-macros.S" + +/******************************************************************************/ + +shabal224_iv: + /* A */ +.long 0xA5201467, 0xA9B8D94A, 0xD4CED997, 0x68379D7B +.long 0xA7FC73BA, 0xF1A2546B, 0x606782BF, 0xE0BCFD0F +.long 0x2F25374E, 0x069A149F, 0x5E2DFF25, 0xFAECF061 + /* B */ +.long 0xEC9905D8, 0xF21850CF, 0xC0A746C8, 0x21DAD498 +.long 0x35156EEB, 0x088C97F2, 0x26303E40, 0x8A2D4FB5 +.long 0xFEEE44B6, 0x8A1E9573, 0x7B81111A, 0xCBC139F0 +.long 0xA3513861, 0x1D2C362E, 0x918C580E, 0xB58E1B9C + /* C */ +.long 0xE4B573A1, 0x4C1A0880, 0x1E907C51, 0x04807EFD +.long 0x3AD8CDE5, 0x16B21302, 0x02512C53, 0x2204CB18 +.long 0x99405F2D, 0xE5B648A1, 0x70AB1D43, 0xA10C25C2 +.long 0x16F1AC05, 0x38BBEB56, 0x9B01DC60, 0xB1096D83 + + +/******************************************************************************/ +/* + * param ctx: r24,r25 + */ +.global shabal224_init +shabal224_init: + movw r26, r24 + ldi r24, 1 + st X+, r24 + st X+, r1 + st X+, r1 + st X+, r1 + st X+, r1 + st X+, r1 + st X+, r1 + st X+, r1 + movw r24, r26 + adiw r24, 12*4+4 + st X+, r24 + st X+, r25 + adiw r24, 4*16-1 + adiw r24, 1 + st X+, r24 + st X+, r25 + ldi r24, (12+16+16) + ldi r30, lo8(shabal224_iv) + ldi r31, hi8(shabal224_iv) +1: lpm r0, Z+ + st X+, r0 + lpm r0, Z+ + st X+, r0 + lpm r0, Z+ + st X+, r0 + lpm r0, Z+ + st X+, r0 + dec r24 + brne 1b + ret + +/******************************************************************************/ + +.global shabal224_ctx2hash +shabal224_ctx2hash: + movw r26, r24 + movw r30, r22 + ldd r24, Z+(8+2) + ldd r25, Z+(8+2+1) + movw r30, r24 + adiw r30, (16-224/32)*4 + ldi r24, 224/8 +1: ld r0, Z+ + st X+, r0 + dec r24 + brne 1b + ret + +/******************************************************************************/ +/* + * param dest: r24:r25 + * param msg: r22:r23 + * param length_b: r18:r21 + */ +DST0 = 8 +DST1 = 9 +CTX0 = 10 +CTX1 = 11 +MSG0 = 12 +MSG1 = 13 +LEN2 = 14 +LEN3 = 15 +LEN0 = 16 +LEN1 = 17 +.global shabal224 +shabal224: + push_range 8, 17 + stack_alloc_large 188 + adiw r30, 1 /* Z points to stack space (ctx) */ + movw CTX0, r30 + movw DST0, r24 + movw MSG0, r22 + movw LEN0, r18 + movw LEN2, r20 + movw r24, r30 + rcall shabal224_init +2: + tst LEN2 + brne 3f + tst LEN3 + breq 4f +3: + movw r24, CTX0 + movw r22, MSG0 + rcall shabal_nextBlock + subi LEN1, 0x02 + sbc LEN2, r1 + sbc LEN3, r1 + ldi r18, 64 + add MSG0, r18 + adc MSG1, r1 + rjmp 2b +4: + movw r24, CTX0 + movw r22, MSG0 + movw r20, LEN0 + rcall shabal_lastBlock + movw r24, DST0 + movw r22, CTX0 + rcall shabal224_ctx2hash + stack_free_large2 188 + pop_range 8, 17 + ret + + diff --git a/shabal256-asm.S b/shabal256-asm.S new file mode 100644 index 0000000..f95e6c7 --- /dev/null +++ b/shabal256-asm.S @@ -0,0 +1,159 @@ +/* shabal256-asm.S */ +/* + This file is part of the AVR-Crypto-Lib. + Copyright (C) 2009 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ +/* + * \file shabal256-asm.S + * \author Daniel Otte + * \email daniel.otte@rub.de + * \date 2009-04-27 + * \license GPLv3 or later + */ + +#include "avr-asm-macros.S" + +/******************************************************************************/ + +shabal256_iv: + /* A */ +.long 0x52F84552, 0xE54B7999, 0x2D8EE3EC, 0xB9645191 +.long 0xE0078B86, 0xBB7C44C9, 0xD2B5C1CA, 0xB0D2EB8C +.long 0x14CE5A45, 0x22AF50DC, 0xEFFDBC6B, 0xEB21B74A + /* B */ +.long 0xB555C6EE, 0x3E710596, 0xA72A652F, 0x9301515F +.long 0xDA28C1FA, 0x696FD868, 0x9CB6BF72, 0x0AFE4002 +.long 0xA6E03615, 0x5138C1D4, 0xBE216306, 0xB38B8890 +.long 0x3EA8B96B, 0x3299ACE4, 0x30924DD4, 0x55CB34A5 + /* C */ +.long 0xB405F031, 0xC4233EBA, 0xB3733979, 0xC0DD9D55 +.long 0xC51C28AE, 0xA327B8E1, 0x56C56167, 0xED614433 +.long 0x88B59D60, 0x60E2CEBA, 0x758B4B8B, 0x83E82A7F +.long 0xBC968828, 0xE6E00BF7, 0xBA839E55, 0x9B491C60 + +/******************************************************************************/ +/* + * param ctx: r24,r25 + */ +.global shabal256_init +shabal256_init: + movw r26, r24 + ldi r24, 1 + st X+, r24 + st X+, r1 + st X+, r1 + st X+, r1 + st X+, r1 + st X+, r1 + st X+, r1 + st X+, r1 + movw r24, r26 + adiw r24, 12*4+4 + st X+, r24 + st X+, r25 + adiw r24, 4*16-1 + adiw r24, 1 + st X+, r24 + st X+, r25 + ldi r24, (12+16+16) + ldi r30, lo8(shabal256_iv) + ldi r31, hi8(shabal256_iv) +1: lpm r0, Z+ + st X+, r0 + lpm r0, Z+ + st X+, r0 + lpm r0, Z+ + st X+, r0 + lpm r0, Z+ + st X+, r0 + dec r24 + brne 1b + ret + +/******************************************************************************/ + +.global shabal256_ctx2hash +shabal256_ctx2hash: + movw r26, r24 + movw r30, r22 + ldd r24, Z+(8+2) + ldd r25, Z+(8+2+1) + movw r30, r24 + adiw r30, (16-256/32)*4 + ldi r24, 256/8 +1: ld r0, Z+ + st X+, r0 + dec r24 + brne 1b + ret + +/******************************************************************************/ +/* + * param dest: r24:r25 + * param msg: r22:r23 + * param length_b: r18:r21 + */ +DST0 = 8 +DST1 = 9 +CTX0 = 10 +CTX1 = 11 +MSG0 = 12 +MSG1 = 13 +LEN2 = 14 +LEN3 = 15 +LEN0 = 16 +LEN1 = 17 +.global shabal256 +shabal256: + push_range 8, 17 + stack_alloc_large 188 + adiw r30, 1 /* Z points to stack space (ctx) */ + movw CTX0, r30 + movw DST0, r24 + movw MSG0, r22 + movw LEN0, r18 + movw LEN2, r20 + movw r24, r30 + rcall shabal256_init +2: + tst LEN2 + brne 3f + tst LEN3 + breq 4f +3: + movw r24, CTX0 + movw r22, MSG0 + rcall shabal_nextBlock + subi LEN1, 0x02 + sbc LEN2, r1 + sbc LEN3, r1 + ldi r18, 64 + add MSG0, r18 + adc MSG1, r1 + rjmp 2b +4: + movw r24, CTX0 + movw r22, MSG0 + movw r20, LEN0 + rcall shabal_lastBlock + movw r24, DST0 + movw r22, CTX0 + rcall shabal256_ctx2hash + stack_free_large2 188 + pop_range 8, 17 + ret + + diff --git a/shabal384-asm.S b/shabal384-asm.S new file mode 100644 index 0000000..e9a053d --- /dev/null +++ b/shabal384-asm.S @@ -0,0 +1,158 @@ +/* shabal384-asm.S */ +/* + This file is part of the AVR-Crypto-Lib. + Copyright (C) 2009 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ +/* + * \file shabal384-asm.S + * \author Daniel Otte + * \email daniel.otte@rub.de + * \date 2009-04-27 + * \license GPLv3 or later + */ + +#include "avr-asm-macros.S" + +/******************************************************************************/ + +shabal384_iv: + /* A */ +.long 0xC8FCA331, 0xE55C504E, 0x003EBF26, 0xBB6B8D83 +.long 0x7B0448C1, 0x41B82789, 0x0A7C9601, 0x8D659CFF +.long 0xB6E2673E, 0xCA54C77B, 0x1460FD7E, 0x3FCB8F2D + /* B */ +.long 0x527291FC, 0x2A16455F, 0x78E627E5, 0x944F169F +.long 0x1CA6F016, 0xA854EA25, 0x8DB98ABE, 0xF2C62641 +.long 0x30117DCB, 0xCF5C4309, 0x93711A25, 0xF9F671B8 +.long 0xB01D2116, 0x333F4B89, 0xB285D165, 0x86829B36 + /* C */ +.long 0xF764B11A, 0x76172146, 0xCEF6934D, 0xC6D28399 +.long 0xFE095F61, 0x5E6018B4, 0x5048ECF5, 0x51353261 +.long 0x6E6E36DC, 0x63130DAD, 0xA9C69BD6, 0x1E90EA0C +.long 0x7C35073B, 0x28D95E6D, 0xAA340E0D, 0xCB3DEE70 + +/******************************************************************************/ +/* + * param ctx: r24,r25 + */ +.global shabal384_init +shabal384_init: + movw r26, r24 + ldi r24, 1 + st X+, r24 + st X+, r1 + st X+, r1 + st X+, r1 + st X+, r1 + st X+, r1 + st X+, r1 + st X+, r1 + movw r24, r26 + adiw r24, 12*4+4 + st X+, r24 + st X+, r25 + adiw r24, 4*16-1 + adiw r24, 1 + st X+, r24 + st X+, r25 + ldi r24, (12+16+16) + ldi r30, lo8(shabal384_iv) + ldi r31, hi8(shabal384_iv) +1: lpm r0, Z+ + st X+, r0 + lpm r0, Z+ + st X+, r0 + lpm r0, Z+ + st X+, r0 + lpm r0, Z+ + st X+, r0 + dec r24 + brne 1b + ret + +/******************************************************************************/ + +.global shabal384_ctx2hash +shabal384_ctx2hash: + movw r26, r24 + movw r30, r22 + ldd r24, Z+(8+2) + ldd r25, Z+(8+2+1) + movw r30, r24 + adiw r30, (16-384/32)*4 + ldi r24, 384/8 +1: ld r0, Z+ + st X+, r0 + dec r24 + brne 1b + ret + +/******************************************************************************/ +/* + * param dest: r24:r25 + * param msg: r22:r23 + * param length_b: r18:r21 + */ +DST0 = 8 +DST1 = 9 +CTX0 = 10 +CTX1 = 11 +MSG0 = 12 +MSG1 = 13 +LEN2 = 14 +LEN3 = 15 +LEN0 = 16 +LEN1 = 17 +.global shabal384 +shabal384: + push_range 8, 17 + stack_alloc_large 188 + adiw r30, 1 /* Z points to stack space (ctx) */ + movw CTX0, r30 + movw DST0, r24 + movw MSG0, r22 + movw LEN0, r18 + movw LEN2, r20 + movw r24, r30 + rcall shabal384_init +2: + tst LEN2 + brne 3f + tst LEN3 + breq 4f +3: + movw r24, CTX0 + movw r22, MSG0 + rcall shabal_nextBlock + subi LEN1, 0x02 + sbc LEN2, r1 + sbc LEN3, r1 + ldi r18, 64 + add MSG0, r18 + adc MSG1, r1 + rjmp 2b +4: + movw r24, CTX0 + movw r22, MSG0 + movw r20, LEN0 + rcall shabal_lastBlock + movw r24, DST0 + movw r22, CTX0 + rcall shabal384_ctx2hash + stack_free_large2 188 + pop_range 8, 17 + ret + diff --git a/shabal512-asm.S b/shabal512-asm.S new file mode 100644 index 0000000..0d412ff --- /dev/null +++ b/shabal512-asm.S @@ -0,0 +1,158 @@ +/* shabal512-asm.S */ +/* + This file is part of the AVR-Crypto-Lib. + Copyright (C) 2009 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ +/* + * \file shabal512-asm.S + * \author Daniel Otte + * \email daniel.otte@rub.de + * \date 2009-04-27 + * \license GPLv3 or later + */ + +#include "avr-asm-macros.S" + +/******************************************************************************/ + +shabal512_iv: + /* A */ +.long 0x20728DFD, 0x46C0BD53, 0xE782B699, 0x55304632 +.long 0x71B4EF90, 0x0EA9E82C, 0xDBB930F1, 0xFAD06B8B +.long 0xBE0CAE40, 0x8BD14410, 0x76D2ADAC, 0x28ACAB7F + /* B */ +.long 0xC1099CB7, 0x07B385F3, 0xE7442C26, 0xCC8AD640 +.long 0xEB6F56C7, 0x1EA81AA9, 0x73B9D314, 0x1DE85D08 +.long 0x48910A5A, 0x893B22DB, 0xC5A0DF44, 0xBBC4324E +.long 0x72D2F240, 0x75941D99, 0x6D8BDE82, 0xA1A7502B + /* C */ +.long 0xD9BF68D1, 0x58BAD750, 0x56028CB2, 0x8134F359 +.long 0xB5D469D8, 0x941A8CC2, 0x418B2A6E, 0x04052780 +.long 0x7F07D787, 0x5194358F, 0x3C60D665, 0xBE97D79A +.long 0x950C3434, 0xAED9A06D, 0x2537DC8D, 0x7CDB5969 + +/******************************************************************************/ +/* + * param ctx: r24,r25 + */ +.global shabal512_init +shabal512_init: + movw r26, r24 + ldi r24, 1 + st X+, r24 + st X+, r1 + st X+, r1 + st X+, r1 + st X+, r1 + st X+, r1 + st X+, r1 + st X+, r1 + movw r24, r26 + adiw r24, 12*4+4 + st X+, r24 + st X+, r25 + adiw r24, 4*16-1 + adiw r24, 1 + st X+, r24 + st X+, r25 + ldi r24, (12+16+16) + ldi r30, lo8(shabal512_iv) + ldi r31, hi8(shabal512_iv) +1: lpm r0, Z+ + st X+, r0 + lpm r0, Z+ + st X+, r0 + lpm r0, Z+ + st X+, r0 + lpm r0, Z+ + st X+, r0 + dec r24 + brne 1b + ret + +/******************************************************************************/ + +.global shabal512_ctx2hash +shabal512_ctx2hash: + movw r26, r24 + movw r30, r22 + ldd r24, Z+(8+2) + ldd r25, Z+(8+2+1) + movw r30, r24 + ; adiw r30, (16-512/32)*4 + ldi r24, 512/8 +1: ld r0, Z+ + st X+, r0 + dec r24 + brne 1b + ret + +/******************************************************************************/ +/* + * param dest: r24:r25 + * param msg: r22:r23 + * param length_b: r18:r21 + */ +DST0 = 8 +DST1 = 9 +CTX0 = 10 +CTX1 = 11 +MSG0 = 12 +MSG1 = 13 +LEN2 = 14 +LEN3 = 15 +LEN0 = 16 +LEN1 = 17 +.global shabal512 +shabal512: + push_range 8, 17 + stack_alloc_large 188 + adiw r30, 1 /* Z points to stack space (ctx) */ + movw CTX0, r30 + movw DST0, r24 + movw MSG0, r22 + movw LEN0, r18 + movw LEN2, r20 + movw r24, r30 + rcall shabal512_init +2: + tst LEN2 + brne 3f + tst LEN3 + breq 4f +3: + movw r24, CTX0 + movw r22, MSG0 + rcall shabal_nextBlock + subi LEN1, 0x02 + sbc LEN2, r1 + sbc LEN3, r1 + ldi r18, 64 + add MSG0, r18 + adc MSG1, r1 + rjmp 2b +4: + movw r24, CTX0 + movw r22, MSG0 + movw r20, LEN0 + rcall shabal_lastBlock + movw r24, DST0 + movw r22, CTX0 + rcall shabal512_ctx2hash + stack_free_large2 188 + pop_range 8, 17 + ret + diff --git a/test_src/main-shabal-test.c b/test_src/main-shabal-test.c index 3e8a9d6..c103529 100644 --- a/test_src/main-shabal-test.c +++ b/test_src/main-shabal-test.c @@ -123,6 +123,86 @@ void testrun_stdtest_shabal(void){ testrun_stdtest_shabal512(mb, strlen(mb)*8); } +void testshort(void){ + uint8_t ma[64]; + memset(ma, 0, 64); + testrun_stdtest_shabal192(ma, 64*8); +} + +void shabal_ctx_dump(shabal_ctx_t* ctx){ + uint8_t i; + void* p; + cli_putstr_P(PSTR("\r\n=== shabal ctx dump ===\r\n size = ")); + i=sizeof(shabal_ctx_t); + if(i>=100) + cli_putc('0'+i/100); + if(i>=10) + cli_putc('0'+(i/10)%10); + cli_putc('0'+i%10); + cli_putstr_P(PSTR("\r\n a = ")); + cli_hexdump_block(ctx->a, 12*4, 5, 4*8); + cli_putstr_P(PSTR("\r\n b_buffer = ")); + cli_hexdump_block(ctx->b_buffer, 12*4, 5, 4*8); + cli_putstr_P(PSTR("\r\n c_buffer = ")); + cli_hexdump_block(ctx->c_buffer, 12*4, 5, 4*8); + if(ctx->b == &(ctx->b_buffer[0])) + cli_putstr_P(PSTR("\r\nb --> b_buffer")); + if(ctx->b == &(ctx->c_buffer[0])) + cli_putstr_P(PSTR("\r\nb --> c_buffer")); + if(ctx->c == &(ctx->b_buffer[0])) + cli_putstr_P(PSTR("\r\nc --> b_buffer")); + if(ctx->c == &(ctx->c_buffer[0])) + cli_putstr_P(PSTR("\r\nc --> c_buffer")); + cli_putstr_P(PSTR("\r\n b = ")); + cli_hexdump(&(ctx->b), 2); + p = ctx->b_buffer; + cli_putstr_P(PSTR("\r\n b (should) = ")); + cli_hexdump(&p, 2); + cli_putstr_P(PSTR("\r\n c = ")); + cli_hexdump(&(ctx->c), 2); + p = ctx->c_buffer; + cli_putstr_P(PSTR("\r\n c (should) = ")); + cli_hexdump(&p, 2); +} + + +void testinit_192(void){ + shabal_ctx_t ctx; + shabal192_init(&ctx); + shabal_ctx_dump(&ctx); +} + +void testinit_224(void){ + shabal_ctx_t ctx; + shabal224_init(&ctx); + shabal_ctx_dump(&ctx); +} + +void testinit_256(void){ + shabal_ctx_t ctx; + shabal256_init(&ctx); + shabal_ctx_dump(&ctx); +} + +void testinit_384(void){ + shabal_ctx_t ctx; + shabal384_init(&ctx); + shabal_ctx_dump(&ctx); +} + +void testinit_512(void){ + shabal_ctx_t ctx; + shabal512_init(&ctx); + shabal_ctx_dump(&ctx); +} +void testinit(void){ + testinit_192(); + testinit_224(); + testinit_256(); + testinit_384(); + testinit_512(); +} + void performance_shabal(void){ uint64_t t; char str[16]; @@ -143,28 +223,28 @@ void performance_shabal(void){ cli_putstr(str); startTimer(1); - shabal192_init(&ctx); + shabal224_init(&ctx); t = stopTimer(); cli_putstr_P(PSTR("\r\n\tctx-gen time (224): ")); ultoa((unsigned long)t, str, 10); cli_putstr(str); startTimer(1); - shabal192_init(&ctx); + shabal256_init(&ctx); t = stopTimer(); cli_putstr_P(PSTR("\r\n\tctx-gen time (256): ")); ultoa((unsigned long)t, str, 10); cli_putstr(str); startTimer(1); - shabal192_init(&ctx); + shabal384_init(&ctx); t = stopTimer(); cli_putstr_P(PSTR("\r\n\tctx-gen time (384): ")); ultoa((unsigned long)t, str, 10); cli_putstr(str); startTimer(1); - shabal192_init(&ctx); + shabal512_init(&ctx); t = stopTimer(); cli_putstr_P(PSTR("\r\n\tctx-gen time (512): ")); ultoa((unsigned long)t, str, 10); @@ -281,6 +361,9 @@ const hfdesc_t* algolist[] PROGMEM = { const char nessie_str[] PROGMEM = "nessie"; const char test_str[] PROGMEM = "test"; +const char testinit192_str[] PROGMEM = "testinit192"; +const char testinit_str[] PROGMEM = "testinit"; +const char testshort_str[] PROGMEM = "short"; const char ztest_str[] PROGMEM = "zerotest"; const char performance_str[] PROGMEM = "performance"; const char echo_str[] PROGMEM = "echo"; @@ -291,6 +374,9 @@ const char shavs_test1_str[] PROGMEM = "shavs_test1"; cmdlist_entry_t cmdlist[] PROGMEM = { { nessie_str, NULL, testrun_nessie_shabal}, { test_str, NULL, testrun_stdtest_shabal}, + { testinit192_str, NULL, testinit_192}, + { testinit_str, NULL, testinit}, + { testshort_str, NULL, testshort}, { performance_str, NULL, performance_shabal}, { shavs_list_str, NULL, shavs_listalgos}, { shavs_set_str, (void*)1, (void_fpt)shavs_setalgo}, diff --git a/test_src/main-skein-test.c b/test_src/main-skein-test.c index 98102d8..4392314 100644 --- a/test_src/main-skein-test.c +++ b/test_src/main-skein-test.c @@ -174,6 +174,10 @@ void zeromsg_test_common(char* p){ } } +void performance_skein(void){ +} + + /***************************************************************************** * main * *****************************************************************************/ @@ -215,6 +219,7 @@ const char shavs_test1_str[] PROGMEM = "shavs_test1"; cmdlist_entry_t cmdlist[] PROGMEM = { // { nessie_str, NULL, testrun_nessie_skein}, + { performance_str, NULL, performance_skein}, { test_str, NULL, testrun_stdtest_skein}, { ztest_str, (void*)1, (void_fpt)zeromsg_test_common}, { shavs_list_str, NULL, shavs_listalgos},