From d32eba56ce10ea6b9eff123b50d9842673b38f2b Mon Sep 17 00:00:00 2001 From: bg Date: Thu, 6 Aug 2009 20:11:02 +0000 Subject: [PATCH] modified structure --- arcfour-asm.S => arcfour/arcfour-asm.S | 0 arcfour.c => arcfour/arcfour.c | 0 arcfour.h => arcfour/arcfour.h | 0 base64_dec.c => base64/base64_dec.c | 0 base64_dec.h => base64/base64_dec.h | 0 base64_enc.c => base64/base64_enc.c | 0 base64_enc.h => base64/base64_enc.h | 0 bmw_large.c => bmw/bmw_large.c | 0 bmw_large.h => bmw/bmw_large.h | 0 bmw_small.c => bmw/bmw_small.c | 0 bmw_small.h => bmw/bmw_small.h | 0 cast5-sbox.h => cast5/cast5-sbox.h | 0 cast5.c => cast5/cast5.c | 0 cast5.h => cast5/cast5.h | 0 cast6.c => cast6/cast6.c | 0 cast6.h => cast6/cast6.h | 0 cast6_sboxes.h => cast6/cast6_sboxes.h | 0 des.c => des/des.c | 0 des.h => des/des.h | 0 entropium.c => entropium/entropium.c | 0 entropium.h => entropium/entropium.h | 0 sha256-asm.S => entropium/sha256-asm.S | 0 sha256.h => entropium/sha256.h | 0 grain.c => grain/grain.c | 0 grain.h => grain/grain.h | 0 hmac-md5/base64_dec.c | 246 ++++ hmac-md5/base64_dec.h | 29 + hmac-md5/base64_enc.c | 117 ++ hmac-md5/base64_enc.h | 28 + hmac-md5.c => hmac-md5/hmac-md5.c | 0 hmac-md5.h => hmac-md5/hmac-md5.h | 0 md5-asm.S => hmac-md5/md5-asm.S | 0 md5.h => hmac-md5/md5.h | 0 hmac-sha1.c => hmac-sha1/hmac-sha1.c | 0 hmac-sha1.h => hmac-sha1/hmac-sha1.h | 0 sha1-asm.S => hmac-sha1/sha1-asm.S | 0 sha1.h => hmac-sha1/sha1.h | 0 hmac-sha256.c => hmac-sha256/hmac-sha256.c | 0 hmac-sha256.h => hmac-sha256/hmac-sha256.h | 0 hmac-sha256/sha256-asm.S | 1042 +++++++++++++++++ hmac-sha256/sha256.h | 122 ++ md5/md5-asm.S | 977 ++++++++++++++++ md5.c => md5/md5.c | 0 md5/md5.h | 55 + md5_sbox.h => md5/md5_sbox.h | 0 mickey128.c => mickey128/mickey128.c | 0 mickey128.h => mickey128/mickey128.h | 0 mkfiles/arcfour.mk | 1 + mkfiles/arcfour_c.mk | 5 +- mkfiles/base64.mk | 2 +- mkfiles/bmw_c.mk | 2 +- mkfiles/cast5.mk | 1 + mkfiles/cast6.mk | 2 +- mkfiles/des.mk | 1 + mkfiles/entropium.mk | 1 + mkfiles/grain.mk | 1 + mkfiles/hmac-md5.mk | 3 +- mkfiles/hmac-sha1.mk | 1 + mkfiles/hmac-sha256.mk | 1 + mkfiles/md5.mk | 1 + mkfiles/md5_c.mk | 1 + mkfiles/mickey128.mk | 1 + mkfiles/present.mk | 2 +- mkfiles/rc5.mk | 2 +- mkfiles/rc6.mk | 2 +- mkfiles/seed.mk | 1 + mkfiles/seed_C.mk | 1 + mkfiles/serpent-bitslice.mk | 2 +- mkfiles/serpent_asm_bitslice.mk | 2 +- mkfiles/serpent_asm_fast.mk | 2 +- mkfiles/serpent_asm_small.mk | 2 +- mkfiles/serpent_c.mk | 2 +- mkfiles/sha1.mk | 1 + mkfiles/sha1_c.mk | 1 + mkfiles/sha256.mk | 1 + mkfiles/sha256_c.mk | 1 + mkfiles/shabea.mk | 1 + mkfiles/shacal1enc.mk | 6 +- mkfiles/shacal2enc.mk | 6 +- mkfiles/skipjack.mk | 1 + mkfiles/tdes.mk | 5 +- mkfiles/trivium.mk | 1 + mkfiles/xtea.mk | 1 + mkfiles/xtea_c.mk | 1 + present.c => present/present.c | 0 present.h => present/present.h | 0 rc5.c => rc5/rc5.c | 0 rc5.h => rc5/rc5.h | 0 rc6.c => rc6/rc6.c | 0 rc6.h => rc6/rc6.h | 0 seed-asm.S => seed/seed-asm.S | 0 seed.h => seed/seed.h | 0 seed_C.c => seed/seed_C.c | 0 seed_sbox.h => seed/seed_sbox.h | 0 serpent/memxor.S | 66 ++ serpent/memxor.h | 7 + serpent-asm.S => serpent/serpent-asm.S | 0 .../serpent-sboxes-bitslice-asm.S | 0 .../serpent-sboxes-bitslice.c | 0 .../serpent-sboxes-fast.S | 0 .../serpent-sboxes-small.S | 0 serpent-sboxes.h => serpent/serpent-sboxes.h | 0 .../serpent-sboxes_c.c | 0 serpent.c => serpent/serpent.c | 0 serpent.h => serpent/serpent.h | 0 sha1/sha1-asm.S | 886 ++++++++++++++ sha1.c => sha1/sha1.c | 0 sha1/sha1.h | 117 ++ sha256/sha256-asm.S | 1042 +++++++++++++++++ sha256.c => sha256/sha256.c | 0 sha256/sha256.h | 122 ++ shabea/memxor.S | 66 ++ shabea/memxor.h | 7 + shabea/sha256-asm.S | 1042 +++++++++++++++++ shabea/sha256.h | 122 ++ shabea.c => shabea/shabea.c | 0 shabea.h => shabea/shabea.h | 0 shacal1/sha1-asm.S | 886 ++++++++++++++ shacal1/sha1.h | 117 ++ shacal1_enc.c => shacal1/shacal1_enc.c | 0 shacal1_enc.h => shacal1/shacal1_enc.h | 0 shacal2/sha256-asm.S | 1042 +++++++++++++++++ shacal2/sha256.h | 122 ++ shacal2_enc.c => shacal2/shacal2_enc.c | 0 shacal2_enc.h => shacal2/shacal2_enc.h | 0 skipjack.c => skipjack/skipjack.c | 0 skipjack.h => skipjack/skipjack.h | 0 test_src/main-hmac-md5-test.c | 12 +- trivium.c => trivium/trivium.c | 0 trivium.h => trivium/trivium.h | 0 xtea-asm.S => xtea/xtea-asm.S | 0 xtea.c => xtea/xtea.c | 0 xtea.h => xtea/xtea.h | 0 133 files changed, 8312 insertions(+), 28 deletions(-) rename arcfour-asm.S => arcfour/arcfour-asm.S (100%) rename arcfour.c => arcfour/arcfour.c (100%) rename arcfour.h => arcfour/arcfour.h (100%) rename base64_dec.c => base64/base64_dec.c (100%) rename base64_dec.h => base64/base64_dec.h (100%) rename base64_enc.c => base64/base64_enc.c (100%) rename base64_enc.h => base64/base64_enc.h (100%) rename bmw_large.c => bmw/bmw_large.c (100%) rename bmw_large.h => bmw/bmw_large.h (100%) rename bmw_small.c => bmw/bmw_small.c (100%) rename bmw_small.h => bmw/bmw_small.h (100%) rename cast5-sbox.h => cast5/cast5-sbox.h (100%) rename cast5.c => cast5/cast5.c (100%) rename cast5.h => cast5/cast5.h (100%) rename cast6.c => cast6/cast6.c (100%) rename cast6.h => cast6/cast6.h (100%) rename cast6_sboxes.h => cast6/cast6_sboxes.h (100%) rename des.c => des/des.c (100%) rename des.h => des/des.h (100%) rename entropium.c => entropium/entropium.c (100%) rename entropium.h => entropium/entropium.h (100%) rename sha256-asm.S => entropium/sha256-asm.S (100%) rename sha256.h => entropium/sha256.h (100%) rename grain.c => grain/grain.c (100%) rename grain.h => grain/grain.h (100%) create mode 100644 hmac-md5/base64_dec.c create mode 100644 hmac-md5/base64_dec.h create mode 100644 hmac-md5/base64_enc.c create mode 100644 hmac-md5/base64_enc.h rename hmac-md5.c => hmac-md5/hmac-md5.c (100%) rename hmac-md5.h => hmac-md5/hmac-md5.h (100%) rename md5-asm.S => hmac-md5/md5-asm.S (100%) rename md5.h => hmac-md5/md5.h (100%) rename hmac-sha1.c => hmac-sha1/hmac-sha1.c (100%) rename hmac-sha1.h => hmac-sha1/hmac-sha1.h (100%) rename sha1-asm.S => hmac-sha1/sha1-asm.S (100%) rename sha1.h => hmac-sha1/sha1.h (100%) rename hmac-sha256.c => hmac-sha256/hmac-sha256.c (100%) rename hmac-sha256.h => hmac-sha256/hmac-sha256.h (100%) create mode 100644 hmac-sha256/sha256-asm.S create mode 100644 hmac-sha256/sha256.h create mode 100644 md5/md5-asm.S rename md5.c => md5/md5.c (100%) create mode 100644 md5/md5.h rename md5_sbox.h => md5/md5_sbox.h (100%) rename mickey128.c => mickey128/mickey128.c (100%) rename mickey128.h => mickey128/mickey128.h (100%) rename present.c => present/present.c (100%) rename present.h => present/present.h (100%) rename rc5.c => rc5/rc5.c (100%) rename rc5.h => rc5/rc5.h (100%) rename rc6.c => rc6/rc6.c (100%) rename rc6.h => rc6/rc6.h (100%) rename seed-asm.S => seed/seed-asm.S (100%) rename seed.h => seed/seed.h (100%) rename seed_C.c => seed/seed_C.c (100%) rename seed_sbox.h => seed/seed_sbox.h (100%) create mode 100644 serpent/memxor.S create mode 100644 serpent/memxor.h rename serpent-asm.S => serpent/serpent-asm.S (100%) rename serpent-sboxes-bitslice-asm.S => serpent/serpent-sboxes-bitslice-asm.S (100%) rename serpent-sboxes-bitslice.c => serpent/serpent-sboxes-bitslice.c (100%) rename serpent-sboxes-fast.S => serpent/serpent-sboxes-fast.S (100%) rename serpent-sboxes-small.S => serpent/serpent-sboxes-small.S (100%) rename serpent-sboxes.h => serpent/serpent-sboxes.h (100%) rename serpent-sboxes_c.c => serpent/serpent-sboxes_c.c (100%) rename serpent.c => serpent/serpent.c (100%) rename serpent.h => serpent/serpent.h (100%) create mode 100644 sha1/sha1-asm.S rename sha1.c => sha1/sha1.c (100%) create mode 100644 sha1/sha1.h create mode 100644 sha256/sha256-asm.S rename sha256.c => sha256/sha256.c (100%) create mode 100644 sha256/sha256.h create mode 100644 shabea/memxor.S create mode 100644 shabea/memxor.h create mode 100644 shabea/sha256-asm.S create mode 100644 shabea/sha256.h rename shabea.c => shabea/shabea.c (100%) rename shabea.h => shabea/shabea.h (100%) create mode 100644 shacal1/sha1-asm.S create mode 100644 shacal1/sha1.h rename shacal1_enc.c => shacal1/shacal1_enc.c (100%) rename shacal1_enc.h => shacal1/shacal1_enc.h (100%) create mode 100644 shacal2/sha256-asm.S create mode 100644 shacal2/sha256.h rename shacal2_enc.c => shacal2/shacal2_enc.c (100%) rename shacal2_enc.h => shacal2/shacal2_enc.h (100%) rename skipjack.c => skipjack/skipjack.c (100%) rename skipjack.h => skipjack/skipjack.h (100%) rename trivium.c => trivium/trivium.c (100%) rename trivium.h => trivium/trivium.h (100%) rename xtea-asm.S => xtea/xtea-asm.S (100%) rename xtea.c => xtea/xtea.c (100%) rename xtea.h => xtea/xtea.h (100%) diff --git a/arcfour-asm.S b/arcfour/arcfour-asm.S similarity index 100% rename from arcfour-asm.S rename to arcfour/arcfour-asm.S diff --git a/arcfour.c b/arcfour/arcfour.c similarity index 100% rename from arcfour.c rename to arcfour/arcfour.c diff --git a/arcfour.h b/arcfour/arcfour.h similarity index 100% rename from arcfour.h rename to arcfour/arcfour.h diff --git a/base64_dec.c b/base64/base64_dec.c similarity index 100% rename from base64_dec.c rename to base64/base64_dec.c diff --git a/base64_dec.h b/base64/base64_dec.h similarity index 100% rename from base64_dec.h rename to base64/base64_dec.h diff --git a/base64_enc.c b/base64/base64_enc.c similarity index 100% rename from base64_enc.c rename to base64/base64_enc.c diff --git a/base64_enc.h b/base64/base64_enc.h similarity index 100% rename from base64_enc.h rename to base64/base64_enc.h diff --git a/bmw_large.c b/bmw/bmw_large.c similarity index 100% rename from bmw_large.c rename to bmw/bmw_large.c diff --git a/bmw_large.h b/bmw/bmw_large.h similarity index 100% rename from bmw_large.h rename to bmw/bmw_large.h diff --git a/bmw_small.c b/bmw/bmw_small.c similarity index 100% rename from bmw_small.c rename to bmw/bmw_small.c diff --git a/bmw_small.h b/bmw/bmw_small.h similarity index 100% rename from bmw_small.h rename to bmw/bmw_small.h diff --git a/cast5-sbox.h b/cast5/cast5-sbox.h similarity index 100% rename from cast5-sbox.h rename to cast5/cast5-sbox.h diff --git a/cast5.c b/cast5/cast5.c similarity index 100% rename from cast5.c rename to cast5/cast5.c diff --git a/cast5.h b/cast5/cast5.h similarity index 100% rename from cast5.h rename to cast5/cast5.h diff --git a/cast6.c b/cast6/cast6.c similarity index 100% rename from cast6.c rename to cast6/cast6.c diff --git a/cast6.h b/cast6/cast6.h similarity index 100% rename from cast6.h rename to cast6/cast6.h diff --git a/cast6_sboxes.h b/cast6/cast6_sboxes.h similarity index 100% rename from cast6_sboxes.h rename to cast6/cast6_sboxes.h diff --git a/des.c b/des/des.c similarity index 100% rename from des.c rename to des/des.c diff --git a/des.h b/des/des.h similarity index 100% rename from des.h rename to des/des.h diff --git a/entropium.c b/entropium/entropium.c similarity index 100% rename from entropium.c rename to entropium/entropium.c diff --git a/entropium.h b/entropium/entropium.h similarity index 100% rename from entropium.h rename to entropium/entropium.h diff --git a/sha256-asm.S b/entropium/sha256-asm.S similarity index 100% rename from sha256-asm.S rename to entropium/sha256-asm.S diff --git a/sha256.h b/entropium/sha256.h similarity index 100% rename from sha256.h rename to entropium/sha256.h diff --git a/grain.c b/grain/grain.c similarity index 100% rename from grain.c rename to grain/grain.c diff --git a/grain.h b/grain/grain.h similarity index 100% rename from grain.h rename to grain/grain.h diff --git a/hmac-md5/base64_dec.c b/hmac-md5/base64_dec.c new file mode 100644 index 0000000..f057f54 --- /dev/null +++ b/hmac-md5/base64_dec.c @@ -0,0 +1,246 @@ +/* base64_dec.c */ +/* + * This file is part of the AVR-Crypto-Lib. + * Copyright (C) 2006, 2007, 2008 Daniel Otte (daniel.otte@rub.de) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + + +/** + * base64 decoder (RFC3548) + * Author: Daniel Otte + * License: GPLv3 + * + * + */ + +#include +#include "base64_dec.h" + +#include "test_src/cli.h" + +/* + #define USE_GCC_EXTENSION +*/ +#if 1 + +#ifdef USE_GCC_EXTENSION + +static +int ascii2bit6(char a){ + switch(a){ + case 'A'...'Z': + return a-'A'; + case 'a'...'z': + return a-'a'+26; + case '0'...'9': + return a-'0'+52; + case '+': + case '-': + return 62; + case '/': + case '_': + return 63; + default: + return -1; + } +} + +#else + +static +uint8_t ascii2bit6(char a){ + int r; + switch(a>>4){ + case 0x5: + case 0x4: + r=a-'A'; + if(r<0 || r>25){ + return -1; + } else { + return r; + } + case 0x7: + case 0x6: + r=a-'a'; + if(r<0 || r>25){ + return -1; + } else { + return r+26; + } + break; + case 0x3: + if(a>'9') + return -1; + return a-'0'+52; + default: + break; + } + switch (a){ + case '+': + case '-': + return 62; + case '/': + case '_': + return 63; + default: + return 0xff; + } +} + +#endif + +#else + +static +uint8_t ascii2bit6(uint8_t a){ + if(a>='A' && a<='Z'){ + return a-'A'; + } else { + if(a>='a' && a<= 'z'){ + return a-'a'+26; + } else { + if(a>='0' && a<='9'){ + return a-'0'+52; + } else { + if(a=='+' || a=='-'){ + return 62; + } else { + if(a=='/' || a=='_'){ + return 63; + } else { + return 0xff; + } + } + } + } + } +} + +#endif + +int base64_binlength(char* str, uint8_t strict){ + int l=0; + uint8_t term=0; + for(;;){ + if(*str=='\0') + break; + if(*str=='\n' || *str=='\r'){ + str++; + continue; + } + if(*str=='='){ + term++; + str++; + if(term==2){ + break; + } + continue; + } + if(term) + return -1; + if(ascii2bit6(*str)==-1){ + if(strict) + return -1; + } else { + l++; + } + str++; + } + switch(term){ + case 0: + if(l%4!=0) + return -1; + return l/4*3; + case 1: + if(l%4!=3) + return -1; + return (l+1)/4*3-1; + case 2: + if(l%4!=2) + return -1; + return (l+2)/4*3-2; + default: + return -1; + } +} + +/* + |543210543210543210543210| + |765432107654321076543210| + + . . . . + |54321054|32105432|10543210| + |76543210|76543210|76543210| + +*/ + +int base64dec(void* dest, char* b64str, uint8_t strict){ + uint8_t buffer[4]; + uint8_t idx=0; + uint8_t term=0; + for(;;){ +// cli_putstr_P(PSTR("\r\n DBG: got 0x")); +// cli_hexdump(b64str, 1); + buffer[idx]= ascii2bit6(*b64str); +// cli_putstr_P(PSTR(" --> 0x")); +// cli_hexdump(buffer+idx, 1); + + if(buffer[idx]==0xFF){ + if(*b64str=='='){ + term++; + b64str++; + if(term==2) + goto finalize; /* definitly the end */ + }else{ + if(*b64str == '\0'){ + goto finalize; /* definitly the end */ + }else{ + if(*b64str == '\r' || *b64str == '\n' || !(strict)){ + b64str++; /* charcters that we simply ignore */ + }else{ + return -1; + } + } + } + }else{ + if(term) + return -1; /* this happens if we get a '=' in the stream */ + idx++; + b64str++; + } + if(idx==4){ + ((uint8_t*)dest)[0] = buffer[0]<<2 | buffer[1]>>4; + ((uint8_t*)dest)[1] = buffer[1]<<4 | buffer[2]>>2; + ((uint8_t*)dest)[2] = buffer[2]<<6 | buffer[3]; + dest = (uint8_t*)dest +3; + idx=0; + } + } + finalize: + /* the final touch */ + if(idx==0) + return 0; + if(term==1){ + ((uint8_t*)dest)[0] = buffer[0]<<2 | buffer[1]>>4; + ((uint8_t*)dest)[1] = buffer[1]<<4 | buffer[2]>>2; + return 0; + } + if(term==2){ + ((uint8_t*)dest)[0] = buffer[0]<<2 | buffer[1]>>4; + return 0; + } + return -1; +} diff --git a/hmac-md5/base64_dec.h b/hmac-md5/base64_dec.h new file mode 100644 index 0000000..39beff8 --- /dev/null +++ b/hmac-md5/base64_dec.h @@ -0,0 +1,29 @@ +/* base64_dec.h */ +/* + * This file is part of the AVR-Crypto-Lib. + * Copyright (C) 2006, 2007, 2008 Daniel Otte (daniel.otte@rub.de) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + + +#ifndef BASE64_DEC_H_ +#define BASE64_DEC_H_ + +#include + +int base64_binlength(char* str, uint8_t strict); +int base64dec(void* dest, char* b64str, uint8_t strict); + +#endif /*BASE64_DEC_H_*/ diff --git a/hmac-md5/base64_enc.c b/hmac-md5/base64_enc.c new file mode 100644 index 0000000..400f25c --- /dev/null +++ b/hmac-md5/base64_enc.c @@ -0,0 +1,117 @@ +/* base64_enc.c */ +/* + * This file is part of the AVR-Crypto-Lib. + * Copyright (C) 2006, 2007, 2008 Daniel Otte (daniel.otte@rub.de) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + + +/** + * base64 encoder (RFC3548) + * Author: Daniel Otte + * License: GPLv3 + * + * + */ + +#include +#include "base64_enc.h" + +#if 1 +#include + +char base64_alphabet[64] PROGMEM = { + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', + 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', + 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', + 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', + 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', + 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', + 'w', 'x', 'y', 'z', '0', '1', '2', '3', + '4', '5', '6', '7', '8', '9', '+', '/' }; + +static +char bit6toAscii(uint8_t a){ + a &= (uint8_t)0x3F; + return pgm_read_byte(base64_alphabet+a); +} + +#else + +static +char bit6toAscii(uint8_t a){ + a &= (uint8_t)0x3F; + + if(a<=25){ + return a+'A'; + } else { + if(a<=51){ + return a-26+'a'; + } else { + if(a<=61){ + return a-52+'0'; + } else { + if(a==62){ + return '+'; + } else { + return '/'; /* a == 63 */ + } + } + } + } +} + +#endif + +void base64enc(char* dest, void* src, uint16_t length){ + uint16_t i,j; + uint8_t a[4]; + for(i=0; i>2; + a[1]= (((((uint8_t*)src)[i*3+0])<<4) | ((((uint8_t*)src)[i*3+1])>>4)) & 0x3F; + a[2]= (((((uint8_t*)src)[i*3+1])<<2) | ((((uint8_t*)src)[i*3+2])>>6)) & 0x3F; + a[3]= (((uint8_t*)src)[i*3+2]) & 0x3F; + for(j=0; j<4; ++j){ + *dest++=bit6toAscii(a[j]); + } + } + /* now we do the rest */ + switch(length%3){ + case 0: + break; + case 1: + a[0]=(((uint8_t*)src)[i*3+0])>>2; + a[1]=((((uint8_t*)src)[i*3+0])<<4)&0x3F; + *dest++ = bit6toAscii(a[0]); + *dest++ = bit6toAscii(a[1]); + *dest++ = '='; + *dest++ = '='; + break; + case 2: + a[0]= (((uint8_t*)src)[i*3+0])>>2; + a[1]= (((((uint8_t*)src)[i*3+0])<<4) | ((((uint8_t*)src)[i*3+1])>>4)) & 0x3F; + a[2]= ((((uint8_t*)src)[i*3+1])<<2) & 0x3F; + *dest++ = bit6toAscii(a[0]); + *dest++ = bit6toAscii(a[1]); + *dest++ = bit6toAscii(a[2]); + *dest++ = '='; + break; + default: /* this will not happen! */ + break; + } +/* finalize: */ + *dest='\0'; +} + diff --git a/hmac-md5/base64_enc.h b/hmac-md5/base64_enc.h new file mode 100644 index 0000000..9065132 --- /dev/null +++ b/hmac-md5/base64_enc.h @@ -0,0 +1,28 @@ +/* base64_enc.h */ +/* + * This file is part of the AVR-Crypto-Lib. + * Copyright (C) 2006, 2007, 2008 Daniel Otte (daniel.otte@rub.de) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + + +#ifndef BASE64_ENC_H_ +#define BASE64_ENC_H_ + +#include + +void base64enc(char* dest, void* src, uint16_t length); + +#endif /*BASE64_ENC_H_*/ diff --git a/hmac-md5.c b/hmac-md5/hmac-md5.c similarity index 100% rename from hmac-md5.c rename to hmac-md5/hmac-md5.c diff --git a/hmac-md5.h b/hmac-md5/hmac-md5.h similarity index 100% rename from hmac-md5.h rename to hmac-md5/hmac-md5.h diff --git a/md5-asm.S b/hmac-md5/md5-asm.S similarity index 100% rename from md5-asm.S rename to hmac-md5/md5-asm.S diff --git a/md5.h b/hmac-md5/md5.h similarity index 100% rename from md5.h rename to hmac-md5/md5.h diff --git a/hmac-sha1.c b/hmac-sha1/hmac-sha1.c similarity index 100% rename from hmac-sha1.c rename to hmac-sha1/hmac-sha1.c diff --git a/hmac-sha1.h b/hmac-sha1/hmac-sha1.h similarity index 100% rename from hmac-sha1.h rename to hmac-sha1/hmac-sha1.h diff --git a/sha1-asm.S b/hmac-sha1/sha1-asm.S similarity index 100% rename from sha1-asm.S rename to hmac-sha1/sha1-asm.S diff --git a/sha1.h b/hmac-sha1/sha1.h similarity index 100% rename from sha1.h rename to hmac-sha1/sha1.h diff --git a/hmac-sha256.c b/hmac-sha256/hmac-sha256.c similarity index 100% rename from hmac-sha256.c rename to hmac-sha256/hmac-sha256.c diff --git a/hmac-sha256.h b/hmac-sha256/hmac-sha256.h similarity index 100% rename from hmac-sha256.h rename to hmac-sha256/hmac-sha256.h diff --git a/hmac-sha256/sha256-asm.S b/hmac-sha256/sha256-asm.S new file mode 100644 index 0000000..d9eb6b6 --- /dev/null +++ b/hmac-sha256/sha256-asm.S @@ -0,0 +1,1042 @@ +/* sha256-asm.S */ +/* + This file is part of the AVR-Crypto-Lib. + Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ +/* + * Author: Daniel Otte + * + * License: GPLv3 or later +*/ +; sha-256 implementation in assembler +SHA256_BLOCK_BITS = 512 +SHA256_HASH_BITS = 256 + +.macro precall + /* push r18 - r27, r30 - r31*/ + push r0 + push r1 + push r18 + push r19 + push r20 + push r21 + push r22 + push r23 + push r24 + push r25 + push r26 + push r27 + push r30 + push r31 + clr r1 +.endm + +.macro postcall + pop r31 + pop r30 + pop r27 + pop r26 + pop r25 + pop r24 + pop r23 + pop r22 + pop r21 + pop r20 + pop r19 + pop r18 + pop r1 + pop r0 +.endm + + +.macro hexdump length + push r27 + push r26 + ldi r25, '\r' + mov r24, r25 + call uart_putc + ldi r25, '\n' + mov r24, r25 + call uart_putc + pop r26 + pop r27 + movw r24, r26 +.if \length > 16 + ldi r22, lo8(16) + ldi r23, hi8(16) + push r27 + push r26 + call uart_hexdump + pop r26 + pop r27 + adiw r26, 16 + hexdump \length-16 +.else + ldi r22, lo8(\length) + ldi r23, hi8(\length) + call uart_hexdump +.endif +.endm + +/* X points to Block */ +.macro dbg_hexdump length + precall + hexdump \length + postcall +.endm + +.section .text + +SPL = 0x3D +SPH = 0x3E +SREG = 0x3F + + +; +;sha256_ctx_t is: +; +; [h0][h1][h2][h3][h4][h5][h6][h7][length] +; hn is 32 bit large, length is 64 bit large + +;########################################################### + +.global sha256_ctx2hash +; === sha256_ctx2hash === +; this function converts a state into a normal hash (bytestring) +; param1: the 16-bit destination pointer +; given in r25,r24 (r25 is most significant) +; param2: the 16-bit pointer to sha256_ctx structure +; given in r23,r22 +sha256_ctx2hash: + movw r26, r22 + movw r30, r24 + ldi r21, 8 + sbiw r26, 4 +1: + ldi r20, 4 + adiw r26, 8 +2: + ld r0, -X + st Z+, r0 + dec r20 + brne 2b + + dec r21 + brne 1b + + ret + +;########################################################### + +.global sha256 +; === sha256 === +; this function calculates SHA-256 hashes from messages in RAM +; param1: the 16-bit hash destination pointer +; given in r25,r24 (r25 is most significant) +; param2: the 16-bit pointer to message +; given in r23,r22 +; param3: 32-bit length value (length of message in bits) +; given in r21,r20,r19,r18 +sha256: +sha256_prolog: + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r16 + push r17 + in r16, SPL + in r17, SPH + subi r16, 8*4+8 + sbci r17, 0 + in r0, SREG + cli + out SPL, r16 + out SPH, r17 + out SREG, r0 + + push r25 + push r24 + inc r16 + adc r17, r1 + + movw r8, r18 /* backup of length*/ + movw r10, r20 + + movw r12, r22 /* backup pf msg-ptr */ + + movw r24, r16 + rcall sha256_init + /* if length >= 512 */ +1: + tst r11 + brne 4f + tst r10 + brne 4f + mov r19, r9 + cpi r19, 0x02 + brlo 4f + + movw r24, r16 + movw r22, r12 + rcall sha256_nextBlock + ldi r19, 0x64 + add r22, r19 + adc r23, r1 + /* length -= 512 */ + ldi r19, 0x02 + sub r9, r19 + sbc r10, r1 + sbc r11, r1 + rjmp 1b + +4: + movw r24, r16 + movw r22, r12 + movw r20, r8 + rcall sha256_lastBlock + + pop r24 + pop r25 + movw r22, r16 + rcall sha256_ctx2hash + +sha256_epilog: + in r30, SPL + in r31, SPH + adiw r30, 8*4+8 + in r0, SREG + cli + out SPL, r30 + out SPH, r31 + out SREG, r0 + pop r17 + pop r16 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + ret + +;########################################################### + + +; block MUST NOT be larger than 64 bytes + +.global sha256_lastBlock +; === sha256_lastBlock === +; this function does padding & Co. for calculating SHA-256 hashes +; param1: the 16-bit pointer to sha256_ctx structure +; given in r25,r24 (r25 is most significant) +; param2: an 16-bit pointer to 64 byte block to hash +; given in r23,r22 +; param3: an 16-bit integer specifing length of block in bits +; given in r21,r20 +sha256_lastBlock_localSpace = (SHA256_BLOCK_BITS/8+1) + + +sha256_lastBlock: + cpi r21, 0x02 + brlo sha256_lastBlock_prolog + push r25 + push r24 + push r23 + push r22 + push r21 + push r20 + rcall sha256_nextBlock + pop r20 + pop r21 + pop r22 + pop r23 + pop r24 + pop r25 + subi r21, 0x02 + subi r23, -2 + rjmp sha256_lastBlock +sha256_lastBlock_prolog: + /* allocate space on stack */ + in r30, SPL + in r31, SPH + in r1, SREG + subi r30, lo8(64) + sbci r31, hi8(64) + cli + out SPL, r30 + out SPH, r31 + out SREG,r1 + + adiw r30, 1 /* SP points to next free byte on stack */ + mov r18, r20 /* r20 = LSB(length) */ + lsr r18 + lsr r18 + lsr r18 + bst r21, 0 /* may be we should explain this ... */ + bld r18, 5 /* now: r18 == length/8 (aka. length in bytes) */ + + + movw r26, r22 /* X points to begin of msg */ + tst r18 + breq sha256_lastBlock_post_copy + mov r1, r18 +sha256_lastBlock_copy_loop: + ld r0, X+ + st Z+, r0 + dec r1 + brne sha256_lastBlock_copy_loop +sha256_lastBlock_post_copy: +sha256_lastBlock_insert_stuffing_bit: + ldi r19, 0x80 + mov r0,r19 + ldi r19, 0x07 + and r19, r20 /* if we are in bitmode */ + breq 2f /* no bitmode */ +1: + lsr r0 + dec r19 + brne 1b + ld r19, X +/* maybe we should do some ANDing here, just for safety */ + or r0, r19 +2: + st Z+, r0 + inc r18 + +/* checking stuff here */ + cpi r18, 64-8+1 + brsh 0f + rjmp sha256_lastBlock_insert_zeros +0: + /* oh shit, we landed here */ + /* first we have to fill it up with zeros */ + ldi r19, 64 + sub r19, r18 + breq 2f +1: + st Z+, r1 + dec r19 + brne 1b +2: + sbiw r30, 63 + sbiw r30, 1 + movw r22, r30 + + push r31 + push r30 + push r25 + push r24 + push r21 + push r20 + rcall sha256_nextBlock + pop r20 + pop r21 + pop r24 + pop r25 + pop r30 + pop r31 + + /* now we should subtract 512 from length */ + movw r26, r24 + adiw r26, 4*8+1 /* we can skip the lowest byte */ + ld r19, X + subi r19, hi8(512) + st X+, r19 + ldi r18, 6 +1: + ld r19, X + sbci r19, 0 + st X+, r19 + dec r18 + brne 1b + +; clr r18 /* not neccessary ;-) */ + /* reset Z pointer to begin of block */ + +sha256_lastBlock_insert_zeros: + ldi r19, 64-8 + sub r19, r18 + breq sha256_lastBlock_insert_length + clr r1 +1: + st Z+, r1 /* r1 is still zero */ + dec r19 + brne 1b + +; rjmp sha256_lastBlock_epilog +sha256_lastBlock_insert_length: + movw r26, r24 /* X points to state */ + adiw r26, 8*4 /* X points to (state.length) */ + adiw r30, 8 /* Z points one after the last byte of block */ + ld r0, X+ + add r0, r20 + st -Z, r0 + ld r0, X+ + adc r0, r21 + st -Z, r0 + ldi r19, 6 +1: + ld r0, X+ + adc r0, r1 + st -Z, r0 + dec r19 + brne 1b + + sbiw r30, 64-8 + movw r22, r30 + rcall sha256_nextBlock + +sha256_lastBlock_epilog: + in r30, SPL + in r31, SPH + in r1, SREG + adiw r30, 63 ; lo8(64) + adiw r30, 1 ; hi8(64) + cli + out SPL, r30 + out SPH, r31 + out SREG,r1 + clr r1 + clr r0 + ret + +/**/ +;########################################################### + +.global sha256_nextBlock +; === sha256_nextBlock === +; this is the core function for calculating SHA-256 hashes +; param1: the 16-bit pointer to sha256_ctx structure +; given in r25,r24 (r25 is most significant) +; param2: an 16-bit pointer to 64 byte block to hash +; given in r23,r22 +sha256_nextBlock_localSpace = (64+8)*4 ; 64 32-bit values for w array and 8 32-bit values for a array (total 288 byte) + +Bck1 = 12 +Bck2 = 13 +Bck3 = 14 +Bck4 = 15 +Func1 = 22 +Func2 = 23 +Func3 = 24 +Func4 = 25 +Accu1 = 16 +Accu2 = 17 +Accu3 = 18 +Accu4 = 19 +XAccu1 = 8 +XAccu2 = 9 +XAccu3 = 10 +XAccu4 = 11 +T1 = 4 +T2 = 5 +T3 = 6 +T4 = 7 +LoopC = 1 +/* byteorder: high number <--> high significance */ +sha256_nextBlock: + ; initial, let's make some space ready for local vars + push r4 /* replace push & pop by mem ops? */ + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + push r28 + push r29 + in r20, SPL + in r21, SPH + movw r18, r20 ;backup SP +; movw r26, r20 ; X points to free space on stack + movw r30, r22 ; Z points to message + subi r20, lo8(sha256_nextBlock_localSpace) ;sbiw can do only up to 63 + sbci r21, hi8(sha256_nextBlock_localSpace) + movw r26, r20 ; X points to free space on stack + in r0, SREG + cli ; we want to be uninterrupted while updating SP + out SPL, r20 + out SPH, r21 + out SREG, r0 + push r18 + push r19 + push r24 + push r25 /* param1 will be needed later */ + ; now we fill the w array with message (think about endianess) + adiw r26, 1 ; X++ + ldi r20, 16 +sha256_nextBlock_wcpyloop: + ld r23, Z+ + ld r22, Z+ + ld r19, Z+ + ld r18, Z+ + st X+, r18 + st X+, r19 + st X+, r22 + st X+, r23 + dec r20 + brne sha256_nextBlock_wcpyloop +/* for (i=16; i<64; ++i){ + w[i] = SIGMA_b(w[i-2]) + w[i-7] + SIGMA_a(w[i-15]) + w[i-16]; + } */ + /* r25,r24,r23,r24 (r21,r20) are function values + r19,r18,r17,r16 are the accumulator + r15,r14,r13,rBck1 are backup1 + r11,r10,r9 ,r8 are xor accu + r1 is round counter */ + + ldi r20, 64-16 + mov LoopC, r20 +sha256_nextBlock_wcalcloop: + movw r30, r26 ; cp X to Z + sbiw r30, 63 + sbiw r30, 1 ; substract 64 = 16*4 + ld Accu1, Z+ + ld Accu2, Z+ + ld Accu3, Z+ + ld Accu4, Z+ /* w[i] = w[i-16] */ + ld Bck1, Z+ + ld Bck2, Z+ + ld Bck3, Z+ + ld Bck4, Z+ /* backup = w[i-15] */ + /* now sigma 0 */ + mov Func1, Bck2 + mov Func2, Bck3 + mov Func3, Bck4 + mov Func4, Bck1 /* prerotated by 8 */ + ldi r20, 1 + rcall bitrotl + movw XAccu1, Func1 + movw XAccu3, Func3 /* store ROTR(w[i-15],7) in xor accu */ + movw Func1, Bck3 + movw Func3, Bck1 /* prerotated by 16 */ + ldi r20, 2 + rcall bitrotr + eor XAccu1, Func1 /* xor ROTR(w[i-15], 18)*/ + eor XAccu2, Func2 + eor XAccu3, Func3 + eor XAccu4, Func4 + ldi Func2, 3 /* now shr3 */ /*we can destroy backup now*/ +sigma0_shr: + lsr Bck4 + ror Bck3 + ror Bck2 + ror Bck1 + dec Func2 + brne sigma0_shr + eor XAccu1, Bck1 + eor XAccu2, Bck2 + eor XAccu3, Bck3 + eor XAccu4, Bck4 /* xor SHR(w[i-15], 3)*/ /* xor accu == sigma1(w[i-15]) */ + add Accu1, XAccu1 + adc Accu2, XAccu2 + adc Accu3, XAccu3 + adc Accu4, XAccu4 /* finished with sigma0 */ + ldd Func1, Z+7*4 /* now accu += w[i-7] */ + ldd Func2, Z+7*4+1 + ldd Func3, Z+7*4+2 + ldd Func4, Z+7*4+3 + add Accu1, Func1 + adc Accu2, Func2 + adc Accu3, Func3 + adc Accu4, Func4 + ldd Bck1, Z+12*4 /* now backup = w[i-2]*/ + ldd Bck2, Z+12*4+1 + ldd Bck3, Z+12*4+2 + ldd Bck4, Z+12*4+3 + /* now sigma 1 */ + movw Func1, Bck3 + movw Func3, Bck1 /* prerotated by 16 */ + ldi r20, 1 + rcall bitrotr + movw XAccu3, Func3 + movw XAccu1, Func1 /* store in ROTR(w[i-2], 17) xor accu */ +; movw Func1, Bck3 +; movw Func3, Bck1 /* prerotated by 16 */ + ldi r20, 2 + rcall bitrotr + eor XAccu1, Func1 /* xor ROTR(w[i-2], 19)*/ + eor XAccu2, Func2 + eor XAccu3, Func3 + eor XAccu4, Func4 + ldi Func2, 2 /* now shr10 (dirty trick, skipping a byte) */ /*we can destroy backup now*/ +sigma1_shr: + lsr Bck4 + ror Bck3 + ror Bck2 + dec Func2 + brne sigma1_shr + eor XAccu1, Bck2 + eor XAccu2, Bck3 + eor XAccu3, Bck4 /* xor SHR(w[i-2], 10)*/ /* xor accu == sigma1(w[i-15]) */ + add Accu1, XAccu1 + adc Accu2, XAccu2 + adc Accu3, XAccu3 + adc Accu4, XAccu4 /* finished with sigma0 */ + /* now let's store the shit */ + st X+, Accu1 + st X+, Accu2 + st X+, Accu3 + st X+, Accu4 + dec LoopC + breq 3f ; skip if zero + rjmp sha256_nextBlock_wcalcloop +3: + /* we are finished with w array X points one byte post w */ +/* init a array */ + pop r31 + pop r30 + push r30 + push r31 + ldi r25, 8*4 /* 8 32-bit values to copy from ctx to a array */ +init_a_array: + ld r1, Z+ + st X+, r1 + dec r25 + brne init_a_array + +/* now the real fun begins */ +/* for (i=0; i<64; ++i){ + t1 = a[7] + SIGMA1(a[4]) + CH(a[4],a[5],a[6]) + k[i] + w[i]; + t2 = SIGMA0(a[0]) + MAJ(a[0],a[1],a[2]); + memmove(&(a[1]), &(a[0]), 7*4); // a[7]=a[6]; a[6]=a[5]; a[5]=a[4]; a[4]=a[3]; a[3]=a[2]; a[2]=a[1]; a[1]=a[0]; + a[4] += t1; + a[0] = t1 + t2; + } */ + /* Y points to a[0], Z ('cause lpm wants it) points to k[i], X points to w[i] */ + sbiw r26, 8*4 /* X still points at a[7]+1*/ + movw r28, r26 + ldi r30, lo8(sha256_kv) + ldi r31, hi8(sha256_kv) + dec r27 /* X - (64*4 == 256) */ + ldi r25, 64 + mov LoopC, r25 +sha256_main_loop: + /* now calculate t1 */ + /*CH(x,y,z) = (x&y)^((~x)&z)*/ + ldd T1, Y+5*4 + ldd T2, Y+5*4+1 + ldd T3, Y+5*4+2 + ldd T4, Y+5*4+3 /* y in T */ + ldd Func1, Y+4*4 + ldd Func2, Y+4*4+1 + ldd Func3, Y+4*4+2 + ldd Func4, Y+4*4+3 /* x in Func */ + ldd Bck1, Y+6*4 + ldd Bck2, Y+6*4+1 + ldd Bck3, Y+6*4+2 + ldd Bck4, Y+6*4+3 /* z in Bck */ + and T1, Func1 + and T2, Func2 + and T3, Func3 + and T4, Func4 + com Func1 + com Func2 + com Func3 + com Func4 + and Bck1, Func1 + and Bck2, Func2 + and Bck3, Func3 + and Bck4, Func4 + eor T1, Bck1 + eor T2, Bck2 + eor T3, Bck3 + eor T4, Bck4 /* done, CH(x,y,z) is in T */ + /* now SIGMA1(a[4]) */ + ldd Bck4, Y+4*4 /* think about using it from Func reg above*/ + ldd Bck1, Y+4*4+1 + ldd Bck2, Y+4*4+2 + ldd Bck3, Y+4*4+3 /* load prerotate by 8-bit */ + movw Func1, Bck1 + movw Func3, Bck3 + ldi r20, 2 + rcall bitrotl /* rotr(x,6) */ + movw XAccu1, Func1 + movw XAccu3, Func3 + movw Func1, Bck1 + movw Func3, Bck3 + ldi r20, 3 + rcall bitrotr /* rotr(x,11) */ + eor XAccu1, Func1 + eor XAccu2, Func2 + eor XAccu3, Func3 + eor XAccu4, Func4 + movw Func1, Bck3 /* this prerotates furteh 16 bits*/ + movw Func3, Bck1 /* so we have now prerotated by 24 bits*/ + ldi r20, 1 + rcall bitrotr /* rotr(x,11) */ + eor XAccu1, Func1 + eor XAccu2, Func2 + eor XAccu3, Func3 + eor XAccu4, Func4 /* finished with SIGMA1, add it to T */ + add T1, XAccu1 + adc T2, XAccu2 + adc T3, XAccu3 + adc T4, XAccu4 + /* now we've to add a[7], w[i] and k[i] */ + ldd XAccu1, Y+4*7 + ldd XAccu2, Y+4*7+1 + ldd XAccu3, Y+4*7+2 + ldd XAccu4, Y+4*7+3 + add T1, XAccu1 + adc T2, XAccu2 + adc T3, XAccu3 + adc T4, XAccu4 /* add a[7] */ + ld XAccu1, X+ + ld XAccu2, X+ + ld XAccu3, X+ + ld XAccu4, X+ + add T1, XAccu1 + adc T2, XAccu2 + adc T3, XAccu3 + adc T4, XAccu4 /* add w[i] */ + lpm XAccu1, Z+ + lpm XAccu2, Z+ + lpm XAccu3, Z+ + lpm XAccu4, Z+ + add T1, XAccu1 + adc T2, XAccu2 + adc T3, XAccu3 + adc T4, XAccu4 /* add k[i] */ /* finished with t1 */ + /*now t2 = SIGMA0(a[0]) + MAJ(a[0],a[1],a[2]) */ /*i did to much x86 asm, i always see 4 32bit regs*/ + /* starting with MAJ(x,y,z) */ + ldd Func1, Y+4*0+0 + ldd Func2, Y+4*0+1 + ldd Func3, Y+4*0+2 + ldd Func4, Y+4*0+3 /* load x=a[0] */ + ldd XAccu1, Y+4*1+0 + ldd XAccu2, Y+4*1+1 + ldd XAccu3, Y+4*1+2 + ldd XAccu4, Y+4*1+3 /* load y=a[1] */ + and XAccu1, Func1 + and XAccu2, Func2 + and XAccu3, Func3 + and XAccu4, Func4 /* XAccu == (x & y) */ + ldd Bck1, Y+4*2+0 + ldd Bck2, Y+4*2+1 + ldd Bck3, Y+4*2+2 + ldd Bck4, Y+4*2+3 /* load z=a[2] */ + and Func1, Bck1 + and Func2, Bck2 + and Func3, Bck3 + and Func4, Bck4 + eor XAccu1, Func1 + eor XAccu2, Func2 + eor XAccu3, Func3 + eor XAccu4, Func4 /* XAccu == (x & y) ^ (x & z) */ + ldd Func1, Y+4*1+0 + ldd Func2, Y+4*1+1 + ldd Func3, Y+4*1+2 + ldd Func4, Y+4*1+3 /* load y=a[1] */ + and Func1, Bck1 + and Func2, Bck2 + and Func3, Bck3 + and Func4, Bck4 + eor XAccu1, Func1 + eor XAccu2, Func2 + eor XAccu3, Func3 + eor XAccu4, Func4 /* XAccu == Maj(x,y,z) == (x & y) ^ (x & z) ^ (y & z) */ + /* SIGMA0(a[0]) */ + ldd Bck1, Y+4*0+0 /* we should combine this with above */ + ldd Bck2, Y+4*0+1 + ldd Bck3, Y+4*0+2 + ldd Bck4, Y+4*0+3 + movw Func1, Bck1 + movw Func3, Bck3 + ldi r20, 2 + rcall bitrotr + movw Accu1, Func1 + movw Accu3, Func3 /* Accu = shr(a[0], 2) */ + movw Func1, Bck3 + movw Func3, Bck1 /* prerotate by 16 bits */ + ldi r20, 3 + rcall bitrotl + eor Accu1, Func1 + eor Accu2, Func2 + eor Accu3, Func3 + eor Accu4, Func4 /* Accu ^= shr(a[0], 13) */ + mov Func1, Bck4 + mov Func2, Bck1 + mov Func3, Bck2 + mov Func4, Bck3 /* prerotate by 24 bits */ + ldi r20, 2 + rcall bitrotl + eor Accu1, Func1 + eor Accu2, Func2 + eor Accu3, Func3 + eor Accu4, Func4 /* Accu ^= shr(a[0], 22) */ + add Accu1, XAccu1 /* add previous result (MAJ)*/ + adc Accu2, XAccu2 + adc Accu3, XAccu3 + adc Accu4, XAccu4 + /* now we are finished with the computing stuff (t1 in T, t2 in Accu)*/ + /* a[7]=a[6]; a[6]=a[5]; a[5]=a[4]; a[4]=a[3]; a[3]=a[2]; a[2]=a[1]; a[1]=a[0]; */ + + ldi r21, 7*4 + adiw r28, 7*4 +a_shift_loop: + ld r25, -Y /* warning: this is PREdecrement */ + std Y+4, r25 + dec r21 + brne a_shift_loop + + ldd Bck1, Y+4*4+0 + ldd Bck2, Y+4*4+1 + ldd Bck3, Y+4*4+2 + ldd Bck4, Y+4*4+3 + add Bck1, T1 + adc Bck2, T2 + adc Bck3, T3 + adc Bck4, T4 + std Y+4*4+0, Bck1 + std Y+4*4+1, Bck2 + std Y+4*4+2, Bck3 + std Y+4*4+3, Bck4 + add Accu1, T1 + adc Accu2, T2 + adc Accu3, T3 + adc Accu4, T4 + std Y+4*0+0, Accu1 + std Y+4*0+1, Accu2 + std Y+4*0+2, Accu3 + std Y+4*0+3, Accu4 /* a array updated */ + + + dec LoopC + breq update_state + rjmp sha256_main_loop ;brne sha256_main_loop +update_state: + /* update state */ + /* pointers to state should still exist on the stack ;-) */ + pop r31 + pop r30 + ldi r21, 8 +update_state_loop: + ldd Accu1, Z+0 + ldd Accu2, Z+1 + ldd Accu3, Z+2 + ldd Accu4, Z+3 + ld Func1, Y+ + ld Func2, Y+ + ld Func3, Y+ + ld Func4, Y+ + add Accu1, Func1 + adc Accu2, Func2 + adc Accu3, Func3 + adc Accu4, Func4 + st Z+, Accu1 + st Z+, Accu2 + st Z+, Accu3 + st Z+, Accu4 + dec r21 + brne update_state_loop + /* now we just have to update the length */ + adiw r30, 1 /* since we add 512, we can simply skip the LSB */ + ldi r21, 2 + ldi r22, 6 + ld r20, Z + add r20, r21 + st Z+, r20 + clr r21 +sha256_nextBlock_fix_length: + brcc sha256_nextBlock_epilog + ld r20, Z + adc r20, r21 + st Z+, r20 + dec r22 + brne sha256_nextBlock_fix_length + +; EPILOG +sha256_nextBlock_epilog: +/* now we should clean up the stack */ + + pop r21 + pop r20 + in r0, SREG + cli ; we want to be uninterrupted while updating SP + out SPL, r20 + out SPH, r21 + out SREG, r0 + + clr r1 + pop r29 + pop r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + ret + +sha256_kv: ; round-key-vector stored in ProgMem +.word 0x2f98, 0x428a, 0x4491, 0x7137, 0xfbcf, 0xb5c0, 0xdba5, 0xe9b5, 0xc25b, 0x3956, 0x11f1, 0x59f1, 0x82a4, 0x923f, 0x5ed5, 0xab1c +.word 0xaa98, 0xd807, 0x5b01, 0x1283, 0x85be, 0x2431, 0x7dc3, 0x550c, 0x5d74, 0x72be, 0xb1fe, 0x80de, 0x06a7, 0x9bdc, 0xf174, 0xc19b +.word 0x69c1, 0xe49b, 0x4786, 0xefbe, 0x9dc6, 0x0fc1, 0xa1cc, 0x240c, 0x2c6f, 0x2de9, 0x84aa, 0x4a74, 0xa9dc, 0x5cb0, 0x88da, 0x76f9 +.word 0x5152, 0x983e, 0xc66d, 0xa831, 0x27c8, 0xb003, 0x7fc7, 0xbf59, 0x0bf3, 0xc6e0, 0x9147, 0xd5a7, 0x6351, 0x06ca, 0x2967, 0x1429 +.word 0x0a85, 0x27b7, 0x2138, 0x2e1b, 0x6dfc, 0x4d2c, 0x0d13, 0x5338, 0x7354, 0x650a, 0x0abb, 0x766a, 0xc92e, 0x81c2, 0x2c85, 0x9272 +.word 0xe8a1, 0xa2bf, 0x664b, 0xa81a, 0x8b70, 0xc24b, 0x51a3, 0xc76c, 0xe819, 0xd192, 0x0624, 0xd699, 0x3585, 0xf40e, 0xa070, 0x106a +.word 0xc116, 0x19a4, 0x6c08, 0x1e37, 0x774c, 0x2748, 0xbcb5, 0x34b0, 0x0cb3, 0x391c, 0xaa4a, 0x4ed8, 0xca4f, 0x5b9c, 0x6ff3, 0x682e +.word 0x82ee, 0x748f, 0x636f, 0x78a5, 0x7814, 0x84c8, 0x0208, 0x8cc7, 0xfffa, 0x90be, 0x6ceb, 0xa450, 0xa3f7, 0xbef9, 0x78f2, 0xc671 + + +;########################################################### + +.global sha256_init +;uint32_t sha256_init_vector[]={ +; 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, +; 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19 }; +; +;void sha256_init(sha256_ctx_t *state){ +; state->length=0; +; memcpy(state->h, sha256_init_vector, 8*4); +;} +; param1: (r23,r24) 16-bit pointer to sha256_ctx_t struct in ram +; modifys: Z(r30,r31), Func1, r22 +sha256_init: + movw r26, r24 ; (24,25) --> (26,27) load X with param1 + ldi r30, lo8((sha256_init_vector)) + ldi r31, hi8((sha256_init_vector)) + ldi r22, 32+8 +sha256_init_vloop: + lpm r23, Z+ + st X+, r23 + dec r22 + brne sha256_init_vloop + ret + +sha256_init_vector: +.word 0xE667, 0x6A09 +.word 0xAE85, 0xBB67 +.word 0xF372, 0x3C6E +.word 0xF53A, 0xA54F +.word 0x527F, 0x510E +.word 0x688C, 0x9B05 +.word 0xD9AB, 0x1F83 +.word 0xCD19, 0x5BE0 +.word 0x0000, 0x0000 +.word 0x0000, 0x0000 + +;########################################################### + +.global rotl32 +; === ROTL32 === +; function that rotates a 32 bit word to the left +; param1: the 32-bit word to rotate +; given in r25,r24,r23,r22 (r25 is most significant) +; param2: an 8-bit value telling how often to rotate +; given in r20 +; modifys: r21, r22 +rotl32: + cpi r20, 8 + brlo bitrotl + mov r21, r25 + mov r25, r24 + mov r24, r23 + mov r23, r22 + mov r22, r21 + subi r20, 8 + rjmp rotl32 +bitrotl: + clr r21 + clc +bitrotl_loop: + tst r20 + breq fixrotl + rol r22 + rol r23 + rol r24 + rol r25 + rol r21 + dec r20 + rjmp bitrotl_loop +fixrotl: + or r22, r21 + ret + + +;########################################################### + +.global rotr32 +; === ROTR32 === +; function that rotates a 32 bit word to the right +; param1: the 32-bit word to rotate +; given in r25,r24,r23,22 (r25 is most significant) +; param2: an 8-bit value telling how often to rotate +; given in r20 +; modifys: r21, r22 +rotr32: + cpi r20, 8 + brlo bitrotr + mov r21, r22 + mov r22, r23 + mov r23, r24 + mov r24, r25 + mov r25, r21 + subi r20, 8 + rjmp rotr32 +bitrotr: + clr r21 + clc +bitrotr_loop: + tst r20 + breq fixrotr + ror r25 + ror r24 + ror r23 + ror r22 + ror r21 + dec r20 + rjmp bitrotr_loop +fixrotr: + or r25, r21 + ret + + +;########################################################### + +.global change_endian32 +; === change_endian32 === +; function that changes the endianess of a 32-bit word +; param1: the 32-bit word +; given in r25,r24,r23,22 (r25 is most significant) +; modifys: r21, r22 +change_endian32: + movw r20, r22 ; (r22,r23) --> (r20,r21) + mov r22, r25 + mov r23, r24 + mov r24, r21 + mov r25, r20 + ret + diff --git a/hmac-sha256/sha256.h b/hmac-sha256/sha256.h new file mode 100644 index 0000000..24960a3 --- /dev/null +++ b/hmac-sha256/sha256.h @@ -0,0 +1,122 @@ +/* sha256.h */ +/* + This file is part of the AVR-Crypto-Lib. + Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ +/** + * \file sha256.h + * \author Daniel Otte + * \date 2006-05-16 + * \license GPLv3 or later + * + */ + +#ifndef SHA256_H_ +#define SHA256_H_ + +#define __LITTLE_ENDIAN__ + + +#include + +/** \def SHA256_HASH_BITS + * defines the size of a SHA-256 hash value in bits + */ + +/** \def SHA256_HASH_BYTES + * defines the size of a SHA-256 hash value in bytes + */ + +/** \def SHA256_BLOCK_BITS + * defines the size of a SHA-256 input block in bits + */ + +/** \def SHA256_BLOCK_BYTES + * defines the size of a SHA-256 input block in bytes + */ + +#define SHA256_HASH_BITS 256 +#define SHA256_HASH_BYTES (SHA256_HASH_BITS/8) +#define SHA256_BLOCK_BITS 512 +#define SHA256_BLOCK_BYTES (SHA256_BLOCK_BITS/8) + +/** \typedef sha256_ctx_t + * \brief SHA-256 context type + * + * A variable of this type may hold the state of a SHA-256 hashing process + */ +typedef struct { + uint32_t h[8]; + uint64_t length; +} sha256_ctx_t; + +/** \typedef sha256_hash_t + * \brief SHA-256 hash value type + * + * A variable of this type may hold the hash value produced by the + * sha256_ctx2hash(sha256_hash_t* dest, const sha256_ctx_t* state) function. + */ +typedef uint8_t sha256_hash_t[SHA256_HASH_BYTES]; + +/** \fn void sha256_init(sha256_ctx_t *state) + * \brief initialise a SHA-256 context + * + * This function sets a ::sha256_ctx_t to the initial values for hashing. + * \param state pointer to the SHA-256 hashing context + */ +void sha256_init(sha256_ctx_t *state); + +/** \fn void sha256_nextBlock (sha256_ctx_t* state, const void* block) + * \brief update the context with a given block + * + * This function updates the SHA-256 hash context by processing the given block + * of fixed length. + * \param state pointer to the SHA-256 hash context + * \param block pointer to the block of fixed length (512 bit = 64 byte) + */ +void sha256_nextBlock (sha256_ctx_t* state, const void* block); + +/** \fn void sha256_lastBlock(sha256_ctx_t* state, const void* block, uint16_t length_b) + * \brief finalize the context with the given block + * + * This function finalizes the SHA-256 hash context by processing the given block + * of variable length. + * \param state pointer to the SHA-256 hash context + * \param block pointer to the block of fixed length (512 bit = 64 byte) + * \param length_b the length of the block in bits + */ +void sha256_lastBlock(sha256_ctx_t* state, const void* block, uint16_t length_b); + +/** \fn void sha256_ctx2hash(sha256_hash_t* dest, const sha256_ctx_t* state) + * \brief convert the hash state into the hash value + * This function reads the context and writes the hash value to the destination + * \param dest pointer to the location where the hash value should be written + * \param state pointer to the SHA-256 hash context + */ +void sha256_ctx2hash(sha256_hash_t* dest, const sha256_ctx_t* state); + +/** \fn void sha256(sha256_hash_t* dest, const void* msg, uint32_t length_b) + * \brief simple SHA-256 hashing function for direct hashing + * + * This function automaticaly hashes a given message of arbitary length with + * the SHA-256 hashing algorithm. + * \param dest pointer to the location where the hash value is going to be written to + * \param msg pointer to the message thats going to be hashed + * \param length_b length of the message in bits + */ +void sha256(sha256_hash_t* dest, const void* msg, uint32_t length_b); + +#endif /*SHA256_H_*/ diff --git a/md5/md5-asm.S b/md5/md5-asm.S new file mode 100644 index 0000000..de3b170 --- /dev/null +++ b/md5/md5-asm.S @@ -0,0 +1,977 @@ +/* md5-asm.S */ +/* + This file is part of the AVR-Crypto-Lib. + Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ +/* + * Author: Daniel Otte + * License: GPLv3 or later + * Date: 2008-11-15 +*/ + + +#include "avr-asm-macros.S" + +;########################################################### +; S-BOX + +T_table: +.hword 0xa478, 0xd76a, 0xb756, 0xe8c7, 0x70db, 0x2420, 0xceee, 0xc1bd, 0x0faf, 0xf57c +.hword 0xc62a, 0x4787, 0x4613, 0xa830, 0x9501, 0xfd46, 0x98d8, 0x6980, 0xf7af, 0x8b44 +.hword 0x5bb1, 0xffff, 0xd7be, 0x895c, 0x1122, 0x6b90, 0x7193, 0xfd98, 0x438e, 0xa679 +.hword 0x0821, 0x49b4, 0x2562, 0xf61e, 0xb340, 0xc040, 0x5a51, 0x265e, 0xc7aa, 0xe9b6 +.hword 0x105d, 0xd62f, 0x1453, 0x0244, 0xe681, 0xd8a1, 0xfbc8, 0xe7d3, 0xcde6, 0x21e1 +.hword 0x07d6, 0xc337, 0x0d87, 0xf4d5, 0x14ed, 0x455a, 0xe905, 0xa9e3, 0xa3f8, 0xfcef +.hword 0x02d9, 0x676f, 0x4c8a, 0x8d2a, 0x3942, 0xfffa, 0xf681, 0x8771, 0x6122, 0x6d9d +.hword 0x380c, 0xfde5, 0xea44, 0xa4be, 0xcfa9, 0x4bde, 0x4b60, 0xf6bb, 0xbc70, 0xbebf +.hword 0x7ec6, 0x289b, 0x27fa, 0xeaa1, 0x3085, 0xd4ef, 0x1d05, 0x0488, 0xd039, 0xd9d4 +.hword 0x99e5, 0xe6db, 0x7cf8, 0x1fa2, 0x5665, 0xc4ac, 0x2244, 0xf429, 0xff97, 0x432a +.hword 0x23a7, 0xab94, 0xa039, 0xfc93, 0x59c3, 0x655b, 0xcc92, 0x8f0c, 0xf47d, 0xffef +.hword 0x5dd1, 0x8584, 0x7e4f, 0x6fa8, 0xe6e0, 0xfe2c, 0x4314, 0xa301, 0x11a1, 0x4e08 +.hword 0x7e82, 0xf753, 0xf235, 0xbd3a, 0xd2bb, 0x2ad7, 0xd391, 0xeb86 + + +#define MD5_init_fast + +.global md5_init +#ifndef MD5_init_fast +;########################################################### +;void md5_init(md5_ctx_t *state) +; param1: (r24,r25) 16-bit pointer to sha256_ctx_t struct in ram +; modifys: Z(r30,r31), X(r25,r26) +; size = 9+5*4 WORDS = 29 WORDS = 58 Bytes +md5_init: + movw r26, r24 ; (24,25) --> (26,27) load X with param1 + ldi r30, lo8(md5_init_vector) + ldi r31, hi8(md5_init_vector) + ldi r24, 16+4 +md5_init_vloop: + lpm r0, Z+ + st X+, r0 + dec r24 + brne md5_init_vloop + ret + +md5_init_vector: +.hword 0x2301, 0x6745 +.hword 0xAB89, 0xEFCD +.hword 0xDCFE, 0x98BA +.hword 0x5476, 0x1032 +.hword 0x0000, 0x0000 + +#else +;########################################################### +.global md5_init_fast +;void md5_init(md5_ctx_t *state) +; param1: (r24,r25) 16-bit pointer to sha256_ctx_t struct in ram +; modifys: r23, r22 +; cycles = 1+16*3+4*2+4 = 1+48+12 = 61 +; size = 1+16*2+4+1 WORDS = 38 WORDS = 76 Bytes +md5_init: +md5_init_fast: + movw r26, r24 + ldi r24, 0x01 + st X+, r24 + ldi r24, 0x23 + st X+, r24 + ldi r24, 0x45 + st X+, r24 + ldi r24, 0x67 + st X+, r24 + ldi r24, 0x89 + st X+, r24 + ldi r24, 0xAB + st X+, r24 + ldi r24, 0xCD + st X+, r24 + ldi r24, 0xEF + st X+, r24 + ldi r24, 0xFE + st X+, r24 + ldi r24, 0xDC + st X+, r24 + ldi r24, 0xBA + st X+, r24 + ldi r24, 0x98 + st X+, r24 + ldi r24, 0x76 + st X+, r24 + ldi r24, 0x54 + st X+, r24 + ldi r24, 0x32 + st X+, r24 + ldi r24, 0x10 + st X+, r24 + st X+, r1 + st X+, r1 + st X+, r1 + st X+, r1 + ret +#endif +;########################################################### + +/* +static +uint32_t md5_F(uint32_t x, uint32_t y, uint32_t z){ + return ((x&y)|((~x)&z)); +} +*/ +; x: r22-r25 +; y: r18-r21 +; z: r14-r17 +md5_F: + and r18, r22 + and r19, r23 + and r20, r24 + and r21, r25 + com r22 + com r23 + com r24 + com r25 + and r22, r14 + and r23, r15 + and r24, r16 + and r25, r17 + or r22, r18 + or r23, r19 + or r24, r20 + or r25, r21 + rjmp md5_core_F_exit + +/* +static +uint32_t md5_G(uint32_t x, uint32_t y, uint32_t z){ + return ((x&z)|((~z)&y)); +} +*/ + +; x: r22-r25 +; y: r18-r21 +; z: r14-r17 +md5_G: + and r22, r14 + and r23, r15 + and r24, r16 + and r25, r17 + com r14 + com r15 + com r16 + com r17 + and r18, r14 + and r19, r15 + and r20, r16 + and r21, r17 + or r22, r18 + or r23, r19 + or r24, r20 + or r25, r21 + rjmp md5_core_F_exit +/* +static +uint32_t md5_H(uint32_t x, uint32_t y, uint32_t z){ + return (x^y^z); +} +*/ +; x: r22-r25 +; y: r18-r21 +; z: r14-r17 +md5_H: + eor r22, r18 + eor r22, r14 + eor r23, r19 + eor r23, r15 + eor r24, r20 + eor r24, r16 + eor r25, r21 + eor r25, r17 + rjmp md5_core_F_exit +/* +static +uint32_t md5_I(uint32_t x, uint32_t y, uint32_t z){ + return (y ^ (x | (~z))); +} +*/ + +jump_table: + rjmp md5_F + rjmp md5_G + rjmp md5_H +; rjmp md5_I + +; x: r22-r25 +; y: r18-r21 +; z: r14-r17 +md5_I: + com r14 + com r15 + com r16 + com r17 + or r22, r14 + or r23, r15 + or r24, r16 + or r25, r17 + eor r22, r18 + eor r23, r19 + eor r24, r20 + eor r25, r21 + rjmp md5_core_F_exit + +as_table: +; (as+0)&3 (as+3)&3 (as+1)&3 (as+2)&3 +; Z X Y +; AS_SAVE0 AS_SAVE1 AS_SAVE2 AS_SAVE3 +.byte 1*4, 0*4, 2*4, 3*4 ;as=1 +.byte 2*4, 1*4, 3*4, 0*4 ;as=2 +.byte 3*4, 2*4, 0*4, 1*4 ;as=3 +.byte 0*4, 3*4, 1*4, 2*4 ;as=4 + +;########################################################### +.global md5_core +md5_core: + mov r21, r20 + mov r20, r18 + mov r19, r16 + mov r18, r14 +; rjmp md5_core_asm +/* +void md5_core(uint32_t* a, void* block, uint8_t as, uint8_t s, uint8_t i, uint8_t fi){ + uint32_t t; + md5_func_t* funcs[]={md5_F, md5_G, md5_H, md5_I}; + as &= 0x3; + / * a = b + ((a + F(b,c,d) + X[k] + T[i]) <<< s). * / + t = a[as] + funcs[fi](a[(as+1)&3], a[(as+2)&3], a[(as+3)&3]) + *((uint32_t*)block) + md5_T[i] ; + a[as]=a[(as+1)&3] + ROTL32(t, s); +} +*/ +; a: r24-r25 +; block: r22-r23 +; as: r21 +; s: r20 +; i: r19 +; fi: r18 +P_A0 = 24 +P_A1 = 25 +P_B0 = 22 +P_B1 = 23 +P_AS = 21 +P_S = 20 +P_I = 19 +P_FI = 18 + +; x: r22-r25 +; y: r18-r21 +; z: r14-r17 + + +AS_SAVE0 = 4 +AS_SAVE1 = 5 +AS_SAVE2 = 6 +AS_SAVE3 = 7 +FI_SAVE = 8 +S_SAVE = 9 +ACCU0 = 10 +ACCU1 = 11 +ACCU2 = 12 +ACCU3 = 13 +ARG_X0 = 22 +ARG_X1 = 23 +ARG_X2 = 24 +ARG_X3 = 25 +ARG_Y0 = 18 +ARG_Y1 = 19 +ARG_Y2 = 20 +ARG_Y3 = 21 +ARG_Z0 = 14 +ARG_Z1 = 15 +ARG_Z2 = 16 +ARG_Z3 = 17 + + +md5_core_asm: + push r16 + push r17 + push_range 4, 8 + ldi r30, lo8(T_table) + ldi r31, hi8(T_table) + lsl P_I + rol r1 + lsl P_I + rol r1 + add r30, P_I + adc r31, r1 + clr r1 + mov FI_SAVE, r18 + /* loading T[i] into ACCU */ + lpm ACCU0, Z+ + lpm ACCU1, Z+ + lpm ACCU2, Z+ + lpm ACCU3, Z + /* add *block to ACCU */ + movw r30, P_B0 + ld r0, Z+ + add ACCU0, r0 + ld r0, Z+ + adc ACCU1, r0 + ld r0, Z+ + adc ACCU2, r0 + ld r0, Z+ + adc ACCU3, r0 + /* add a[as+0&3] to ACCU */ + ldi r30, lo8(as_table) + ldi r31, hi8(as_table) + dec P_AS + andi P_AS, 0x03 + lsl P_AS + lsl P_AS + add r30, r21 + adc r31, r1 ; Z points to the correct row in as_table + lpm AS_SAVE0, Z+ + lpm AS_SAVE1, Z+ + lpm AS_SAVE2, Z+ + lpm AS_SAVE3, Z + movw r26, r24 ; X points to a[0] + add r26, AS_SAVE0 + adc r27, r1 ; X points at a[as&3] + ld r0, X+ + add ACCU0, r0 + ld r0, X+ + adc ACCU1, r0 + ld r0, X+ + adc ACCU2, r0 + ld r0, X+ + adc ACCU3, r0 + mov S_SAVE, r20 + + movw r28, r24 + /* loading z value */ + movw r26, r28 + add r26, AS_SAVE1 + adc r27, r1 + ld ARG_Z0, X+ + ld ARG_Z1, X+ + ld ARG_Z2, X+ + ld ARG_Z3, X + + /* loading x value */ + movw r26, r28 + add r26, AS_SAVE2 + adc r27, r1 + ld ARG_X0, X+ + ld ARG_X1, X+ + ld ARG_X2, X+ + ld ARG_X3, X + + /* loading y value */ + movw r26, r28 + add r26, AS_SAVE3 + adc r27, r1 + ldi r30, pm_lo8(jump_table) + ldi r31, pm_hi8(jump_table) + add r30, FI_SAVE + adc r31, r1 ; Z points to the correct entry in our jump table + ld ARG_Y0, X+ + ld ARG_Y1, X+ + ld ARG_Y2, X+ + ld ARG_Y3, X + + ijmp /* calls the function pointed by Z */ +md5_core_F_exit: + + /* add ACCU to result of f() */ + add r22, ACCU0 + adc r23, ACCU1 + adc r24, ACCU2 + adc r25, ACCU3 + + /* rotate */ + mov r20, S_SAVE +rotl32: + cpi r20, 8 + brlo bitrotl + mov r21, r25 + mov r25, r24 + mov r24, r23 + mov r23, r22 + mov r22, r21 + subi r20, 8 + rjmp rotl32 +bitrotl: + mov r21, r25 +bitrotl_loop: + tst r20 + breq fixrotl +bitrotl_loop2: + lsl r21 + rol r22 + rol r23 + rol r24 + rol r25 + dec r20 + brne bitrotl_loop2 +fixrotl: + + /* add a[(as+1)&3] */ + movw r26, r28 + add r26, AS_SAVE2 + adc r27, r1 + ld r0, X+ + add r22, r0 + ld r0, X+ + adc r23, r0 + ld r0, X+ + adc r24, r0 + ld r0, X + adc r25, r0 + + /* store result */ + movw r26, r28 + add r26, AS_SAVE0 + adc r27, r1 + st X+, r22 + st X+, r23 + st X+, r24 + st X , r25 +md5_core_exit: + pop_range 4, 8 + pop r17 + pop r16 + ret + +;################################################################### +/* +void md5_nextBlock(md5_ctx_t *state, void* block){ + uint32_t a[4]; + uint8_t m,n,i=0; + + a[0]=state->a[0]; + a[1]=state->a[1]; + a[2]=state->a[2]; + a[3]=state->a[3]; + + / * round 1 * / + uint8_t s1t[]={7,12,17,22}; // 1,-1 1,4 2,-1 3,-2 + for(m=0;m<4;++m){ + for(n=0;n<4;++n){ + md5_core(a, &(((uint32_t*)block)[m*4+n]), 4-n, s1t[n],i++,0); + } + } + / * round 2 * / + uint8_t s2t[]={5,9,14,20}; // 1,-3 1,1 2,-2 2,4 + for(m=0;m<4;++m){ + for(n=0;n<4;++n){ + md5_core(a, &(((uint32_t*)block)[(1+m*4+n*5)&0xf]), 4-n, s2t[n],i++,1); + } + } + / * round 3 * / + uint8_t s3t[]={4,11,16,23}; // 0,4 1,3 2,0 3,-1 + for(m=0;m<4;++m){ + for(n=0;n<4;++n){ + md5_core(a, &(((uint32_t*)block)[(5-m*4+n*3)&0xf]), 4-n, s3t[n],i++,2); + } + } + / * round 4 * / + uint8_t s4t[]={6,10,15,21}; // 1,-2 1,2 2,-1 3,-3 + for(m=0;m<4;++m){ + for(n=0;n<4;++n){ + md5_core(a, &(((uint32_t*)block)[(0-m*4+n*7)&0xf]), 4-n, s4t[n],i++,3); + } + } + state->a[0] += a[0]; + state->a[1] += a[1]; + state->a[2] += a[2]; + state->a[3] += a[3]; + state->counter++; +} +*/ + +shift_table_1: .byte 7,12,17,22 +shift_table_2: .byte 5, 9,14,20 +shift_table_3: .byte 4,11,16,23 +shift_table_4: .byte 6,10,15,21 + +index_table_r2: +;(1+m*4+n*5)&0xf: + .byte 0x04, 0x18, 0x2c, 0x00 + .byte 0x14, 0x28, 0x3c, 0x10 + .byte 0x24, 0x38, 0x0c, 0x20 + .byte 0x34, 0x08, 0x1c, 0x30 + +index_table_r3: +;(5-m*4+n*3)&0xf: + .byte 0x14, 0x20, 0x2c, 0x38 + .byte 0x04, 0x10, 0x1c, 0x28 + .byte 0x34, 0x00, 0x0c, 0x18 + .byte 0x24, 0x30, 0x3c, 0x08 + +index_table_r4: +;(0-m*4+n*7)&0xf: + .byte 0x00, 0x1c, 0x38, 0x14 + .byte 0x30, 0x0c, 0x28, 0x04 + .byte 0x20, 0x3c, 0x18, 0x34 + .byte 0x10, 0x2c, 0x08, 0x24 + +APTR_REG = 2 +BPTR_REG = 4 +N_REG = 6 +M_REG = 7 +I_REG = 8 +.global md5_nextBlock +md5_nextBlock: + stack_alloc 16 + push_range 2, 17 + push r28 + push r29 + push r24 + push r25 + adiw r30, 1 /* Z now points to the beginning of the allocated memory */ + movw r2, r30 + movw r4, r22 + movw r26, r24 + ldi r20, 16 +1: + ld r0, X+ + st Z+, r0 + dec r20 + brne 1b + /* state now copied to stack memory */ + clr I_REG + /* Round 1 */ + clr M_REG + ldi r17, 4 +1: + clr N_REG + ldi r16, 4 +2: + movw r24, APTR_REG + movw r22, BPTR_REG + mov r0, M_REG + lsl r0 + lsl r0 + add r0, N_REG + lsl r0 + lsl r0 + add r22, r0 + adc r23, r1 + mov r21, r16 + ldi r30, lo8(shift_table_1) + ldi r31, hi8(shift_table_1) + add r30, N_REG + adc r31, r1 + lpm r20, Z + mov r19, I_REG + ldi r18, 0 + rcall md5_core_asm + inc I_REG + inc N_REG + dec r16 + brne 2b + inc M_REG + dec r17 + brne 1b + + /* Round 2 */ + clr M_REG + ldi r17, 4 +1: + clr N_REG + ldi r16, 4 +2: + movw r24, APTR_REG + movw r22, BPTR_REG + ldi r30, lo8(index_table_r2) + ldi r31, hi8(index_table_r2) + mov r0, M_REG + lsl r0 + lsl r0 + add r0, N_REG + add r30, r0 + adc r31, r1 + lpm r0, Z + add r22, r0 + adc r23, r1 + mov r21, r16 + ldi r30, lo8(shift_table_2) + ldi r31, hi8(shift_table_2) + add r30, N_REG + adc r31, r1 + lpm r20, Z + mov r19, I_REG + ldi r18, 1 + rcall md5_core_asm + inc I_REG + inc N_REG + dec r16 + brne 2b + inc M_REG + dec r17 + brne 1b + + /* Round 3 */ + clr M_REG + ldi r17, 4 +1: + clr N_REG + ldi r16, 4 +2: + movw r24, APTR_REG + movw r22, BPTR_REG + ldi r30, lo8(index_table_r3) + ldi r31, hi8(index_table_r3) + mov r0, M_REG + lsl r0 + lsl r0 + add r0, N_REG + add r30, r0 + adc r31, r1 + lpm r0, Z + add r22, r0 + adc r23, r1 + mov r21, r16 + ldi r30, lo8(shift_table_3) + ldi r31, hi8(shift_table_3) + add r30, N_REG + adc r31, r1 + lpm r20, Z + mov r19, I_REG + ldi r18, 2 + rcall md5_core_asm + inc I_REG + inc N_REG + dec r16 + brne 2b + inc M_REG + dec r17 + brne 1b + + /* Round 4 */ + clr M_REG + ldi r17, 4 +1: + clr N_REG + ldi r16, 4 +2: + movw r24, APTR_REG + movw r22, BPTR_REG + ldi r30, lo8(index_table_r4) + ldi r31, hi8(index_table_r4) + mov r0, M_REG + lsl r0 + lsl r0 + add r0, N_REG + add r30, r0 + adc r31, r1 + lpm r0, Z + add r22, r0 + adc r23, r1 + mov r21, r16 + ldi r30, lo8(shift_table_4) + ldi r31, hi8(shift_table_4) + add r30, N_REG + adc r31, r1 + lpm r20, Z + mov r19, I_REG + ldi r18, 3 + rcall md5_core_asm + inc I_REG + inc N_REG + dec r16 + brne 2b + inc M_REG + dec r17 + brne 1b + + + pop r27 + pop r26 /* X now points to the context */ + movw r30, APTR_REG + ldi r16, 4 +1: + ld r0, X + ld r2, Z+ + add r0, r2 + st X+, r0 + ld r0, X + ld r2, Z+ + adc r0, r2 + st X+, r0 + ld r0, X + ld r2, Z+ + adc r0, r2 + st X+, r0 + ld r0, X + ld r2, Z+ + adc r0, r2 + st X+, r0 + dec r16 + brne 1b + + ld r0, X + inc r0 + st X+, r0 + brne 2f + ld r0, X + inc r0 + st X+, r0 + brne 2f + ld r0, X + inc r0 + st X+, r0 + brne 2f + ld r0, X + inc r0 + st X+, r0 +2: + + pop r29 + pop r28 + pop_range 2, 17 + stack_free 16 + ret + +;############################################################################### +/* +void md5_lastBlock(md5_ctx_t *state, const void* block, uint16_t length_b){ + uint16_t l; + uint8_t b[64]; + while (length_b >= 512){ + md5_nextBlock(state, block); + length_b -= 512; + block = ((uint8_t*)block) + 512/8; + } + memset(b, 0, 64); + memcpy(b, block, length_b/8); + / * insert padding one * / + l=length_b/8; + if(length_b%8){ + uint8_t t; + t = ((uint8_t*)block)[l]; + t |= (0x80>>(length_b%8)); + b[l]=t; + }else{ + b[l]=0x80; + } + / * insert length value * / + if(l+sizeof(uint64_t) >= 512/8){ + md5_nextBlock(state, b); + state->counter--; + memset(b, 0, 64-8); + } + *((uint64_t*)&b[64-sizeof(uint64_t)]) = (state->counter * 512) + length_b; + md5_nextBlock(state, b); +} +*/ +; state_ptr : r24,r25 +; block_ptr : r22,r23 +; length_b : r20,r21 +.global md5_lastBlock +md5_lastBlock: + stack_alloc_large 64 + push_range 12, 17 + push r30 + push r31 + movw r16, r20 /* length_b */ + movw r14, r22 /* block_ptr */ + movw r12, r24 /* state_ptr */ + ldi r18, 64 +2: + cpi r17, 2 /* hi8(512) */ + brlo 2f +1: + movw r24, r12 + movw r22, r14 + rcall md5_nextBlock + add r14, r18 + adc r15, r1 + subi r17, 2 + rjmp 2b +2: + pop r31 + pop r30 + + adiw r30, 1 /* adjust Z to point to buffer */ + movw r26, r14 + movw r24, r16 + adiw r24, 7 + + lsr r25 + ror r24 + lsr r25 + ror r24 + lsr r24 /* r24 now holds how many bytes are to copy */ + ldi r18, 64 + sub r18, r24 /* r18 will hold the amount of used bytes in buffer */ + tst r24 +4: + breq 5f + ld r0, X+ + st Z+, r0 + dec r24 + rjmp 4b /* Z points to the byte after msg in buffer */ +5: /* append 1-bit */ + mov r20, r16 + ldi r19, 0x80 + andi r20, 0x07 + brne bit_fucking + st Z+, r19 + dec r18 /* 'allocate' another byte in buffer */ + rjmp after_bit_fucking +bit_fucking: +1: + lsr r19 + dec r20 + brne 1b + or r0, r19 + st -Z, r0 + adiw r30, 1 +after_bit_fucking: + clt + cpi r18, 8 + brmi 2f + set /* store in t if the counter will also fit in this block (1 if fit)*/ +2: + tst r18 + breq 2f +1: /* fill remaning buffer with zeros */ + st Z+, r1 + dec r18 + brne 1b +2: + sbiw r30, 63 + sbiw r30, 1 + movw r14, r30 /* r14:r15 now points to buffer */ + brts load_counter + /* counter does not fit, finalize this block */ + movw r24, r12 + movw r22, r14 + rcall md5_nextBlock + movw r30, r14 + ldi r20, 64-8 +3: + st Z+, r1 + dec r20 + brne 3b + +load_counter: + movw r26, r12 /* X points to state */ + adiw r26, 16 + ld r19, X+ + ld r20, X+ + ld r21, X+ + ld r22, X+ + brts post_counter_decrement /* do not decremen because counter fits */ +counter_decrement: + subi r19, 1 + sbci r20, 0 + sbci r21, 0 + sbci r22, 0 +post_counter_decrement: + clr r18 + clr r23 + lsl r19 + rol r20 + rol r21 + rol r22 + rol r23 + mov r18, r16 /* r16:r17 length_b */ + add r19, r17 + adc r20, r1 + adc r21, r1 + adc r22, r1 + adc r23, r1 + movw r30, r14 + adiw r30, 64-8 + st Z+, r18 + st Z+, r19 + st Z+, r20 + st Z+, r21 + st Z+, r22 + st Z+, r23 + st Z+, r1 + st Z, r1 + + sbiw r30, 63 +; sbiw r30, 1 + movw r24, r12 + movw r22, r30 + rcall md5_nextBlock +md5_lastBlock_exit: + pop_range 12, 17 + stack_free_large 64 + ret + + +;############################################################################### + + +.global md5_ctx2hash +md5_ctx2hash: + movw r26, r24 + movw r30, r22 + ldi r22, 16 +1: + ld r0, Z+ + st X+, r0 + dec r22 + brne 1b + ret + + +;############################################################################### + + +.global md5 +md5: + stack_alloc 20 + push_range 8, 17 + adiw r30, 1 + movw r8, r30 /* ctx */ + movw r10, r24 /* dest */ + movw r12, r22 /* msg */ + movw r14, r18 /* length (low) */ + movw r16, r20 /* length (high) */ + movw r24, r30 + rcall md5_init +1: + tst r16 + brne next_round + tst r17 + breq last_round +next_round: + movw r24, r8 + movw r22, r12 + rcall md5_nextBlock + ldi r22, 64 + add r12, r22 + adc r13, r1 + ldi r22, 2 + sub r15, r22 + sbci r16, 0 + sbci r17, 0 + rjmp 1b +last_round: + movw r24, r8 + movw r22, r12 + movw r20, r14 + rcall md5_lastBlock + movw r24, r10 + movw r22, r8 + rcall md5_ctx2hash + pop_range 8, 17 + stack_free 20 + ret + + + diff --git a/md5.c b/md5/md5.c similarity index 100% rename from md5.c rename to md5/md5.c diff --git a/md5/md5.h b/md5/md5.h new file mode 100644 index 0000000..6b65c4a --- /dev/null +++ b/md5/md5.h @@ -0,0 +1,55 @@ +/* md5.h */ +/* + This file is part of the AVR-Crypto-Lib. + Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ +/* + * File: md5.h + * Author: Daniel Otte + * Date: 31.07.2006 + * License: GPL + * Description: Implementation of the MD5 hash algorithm as described in RFC 1321 + * + */ + + +#ifndef MD5_H_ +#define MD5_H_ + +#include + + +#define MD5_HASH_BITS 128 +#define MD5_HASH_BYTES (MD5_HASH_BITS/8) +#define MD5_BLOCK_BITS 512 +#define MD5_BLOCK_BYTES (MD5_BLOCK_BITS/8) + + +typedef struct md5_ctx_st { + uint32_t a[4]; + uint32_t counter; +} md5_ctx_t; + +typedef uint8_t md5_hash_t[MD5_HASH_BYTES]; + + +void md5_init(md5_ctx_t *s); +void md5_nextBlock(md5_ctx_t *state, const void* block); +void md5_lastBlock(md5_ctx_t *state, const void* block, uint16_t length); +void md5_ctx2hash(md5_hash_t* dest, const md5_ctx_t* state); +void md5(md5_hash_t* dest, const void* msg, uint32_t length_b); + +#endif /*MD5_H_*/ diff --git a/md5_sbox.h b/md5/md5_sbox.h similarity index 100% rename from md5_sbox.h rename to md5/md5_sbox.h diff --git a/mickey128.c b/mickey128/mickey128.c similarity index 100% rename from mickey128.c rename to mickey128/mickey128.c diff --git a/mickey128.h b/mickey128/mickey128.h similarity index 100% rename from mickey128.h rename to mickey128/mickey128.h diff --git a/mkfiles/arcfour.mk b/mkfiles/arcfour.mk index d62c144..ebc6858 100644 --- a/mkfiles/arcfour.mk +++ b/mkfiles/arcfour.mk @@ -4,6 +4,7 @@ ALGO_NAME := ARCFOUR # comment out the following line for removement of ARCFOUR from the build process STREAM_CIPHERS += $(ALGO_NAME) +$(ALGO_NAME)_DIR := arcfour/ $(ALGO_NAME)_OBJ := arcfour-asm.o $(ALGO_NAME)_TEST_BIN := main-arcfour-test.o $(CLI_STD) \ nessie_stream_test.o nessie_common.o performance_test.o diff --git a/mkfiles/arcfour_c.mk b/mkfiles/arcfour_c.mk index 40a3a84..5bee9f0 100644 --- a/mkfiles/arcfour_c.mk +++ b/mkfiles/arcfour_c.mk @@ -4,10 +4,9 @@ ALGO_NAME := ARCFOUR_C # comment out the following line for removement of ARCFOUR from the build process STREAM_CIPHERS += $(ALGO_NAME) +$(ALGO_NAME)_DIR := arcfour/ $(ALGO_NAME)_OBJ := arcfour.o -$(ALGO_NAME)_TEST_BIN := main-arcfour-test.o $(CLI_STD) \ - nessie_stream_test.o nessie_common.o \ - performance_test.o +$(ALGO_NAME)_TEST_BIN := main-arcfour-test.o $(CLI_STD) nessie_stream_test.o nessie_common.o performance_test.o $(ALGO_NAME)_NESSIE_TEST := "nessie" $(ALGO_NAME)_PERFORMANCE_TEST := "performance" diff --git a/mkfiles/base64.mk b/mkfiles/base64.mk index 58883c3..013b7ee 100644 --- a/mkfiles/base64.mk +++ b/mkfiles/base64.mk @@ -4,7 +4,7 @@ ALGO_NAME := BASE64 # comment out the following line for removement of base64 from the build process ENCODINGS += $(ALGO_NAME) - +$(ALGO_NAME)_DIR := base64/ $(ALGO_NAME)_OBJ := base64_enc.o base64_dec.o $(ALGO_NAME)_TEST_BIN := main-base64-test.o $(CLI_STD) \ performance_test.o noekeon_asm.o noekeon_prng.o memxor.o diff --git a/mkfiles/bmw_c.mk b/mkfiles/bmw_c.mk index 6305932..585bbb2 100644 --- a/mkfiles/bmw_c.mk +++ b/mkfiles/bmw_c.mk @@ -4,7 +4,7 @@ ALGO_NAME := BMW_C # comment out the following line for removement of BlueMidnightWish from the build process HASHES += $(ALGO_NAME) - +$(ALGO_NAME)_DIR := bmw/ $(ALGO_NAME)_OBJ := bmw_small.o bmw_large.o $(ALGO_NAME)_TEST_BIN := main-bmw-test.o hfal_bmw_small.o hfal_bmw_large.o $(CLI_STD) $(HFAL_STD) $(ALGO_NAME)_NESSIE_TEST := test nessie diff --git a/mkfiles/cast5.mk b/mkfiles/cast5.mk index 08ead6e..318a0e5 100644 --- a/mkfiles/cast5.mk +++ b/mkfiles/cast5.mk @@ -4,6 +4,7 @@ ALGO_NAME := CAST5 # comment out the following line for removement of CAST5 from the build process BLOCK_CIPHERS += $(ALGO_NAME) +$(ALGO_NAME)_DIR := cast5/ $(ALGO_NAME)_OBJ := cast5.o $(ALGO_NAME)_TEST_BIN := main-cast5-test.o $(CLI_STD) \ nessie_bc_test.o nessie_common.o performance_test.o diff --git a/mkfiles/cast6.mk b/mkfiles/cast6.mk index 3f8539f..f28800a 100644 --- a/mkfiles/cast6.mk +++ b/mkfiles/cast6.mk @@ -4,7 +4,7 @@ ALGO_NAME := CAST6 # comment out the following line for removement of CAST6 from the build process BLOCK_CIPHERS += $(ALGO_NAME) - +$(ALGO_NAME)_DIR := cast6/ $(ALGO_NAME)_OBJ := cast6.o $(ALGO_NAME)_TEST_BIN := main-cast6-test.o $(CLI_STD) \ nessie_bc_test.o nessie_common.o performance_test.o diff --git a/mkfiles/des.mk b/mkfiles/des.mk index 5d9540d..180d9e1 100644 --- a/mkfiles/des.mk +++ b/mkfiles/des.mk @@ -4,6 +4,7 @@ ALGO_NAME := DES # comment out the following line for removement of DES from the build process BLOCK_CIPHERS += $(ALGO_NAME) +$(ALGO_NAME)_DIR := des/ $(ALGO_NAME)_OBJ := des.o $(ALGO_NAME)_TEST_BIN := main-des-test.o $(CLI_STD) \ nessie_bc_test.o nessie_common.o performance_test.o diff --git a/mkfiles/entropium.mk b/mkfiles/entropium.mk index 02ad75b..e87b3de 100644 --- a/mkfiles/entropium.mk +++ b/mkfiles/entropium.mk @@ -4,6 +4,7 @@ ALGO_NAME := ENTROPIUM # comment out the following line for removement of PRNG from the build process PRNGS += $(ALGO_NAME) +$(ALGO_NAME)_DIR := entropium/ $(ALGO_NAME)_OBJ := entropium.o sha256-asm.o $(ALGO_NAME)_TEST_BIN := main-entropium-test.o $(CLI_STD) performance_test.o diff --git a/mkfiles/grain.mk b/mkfiles/grain.mk index 1b0da65..5e6638e 100644 --- a/mkfiles/grain.mk +++ b/mkfiles/grain.mk @@ -4,6 +4,7 @@ ALGO_NAME := GRAIN # comment out the following line for removement of Grain from the build process STREAM_CIPHERS += $(ALGO_NAME) +$(ALGO_NAME)_DIR := grain/ $(ALGO_NAME)_OBJ := grain.o $(ALGO_NAME)_TEST_BIN := main-grain-test.o $(CLI_STD) \ nessie_stream_test.o nessie_common.o performance_test.o diff --git a/mkfiles/hmac-md5.mk b/mkfiles/hmac-md5.mk index 7d3f644..fd23627 100644 --- a/mkfiles/hmac-md5.mk +++ b/mkfiles/hmac-md5.mk @@ -4,9 +4,10 @@ ALGO_NAME := HMAC-MD5 # comment out the following line for removement of HMAC-MD5 from the build process MACS += $(ALGO_NAME) +$(ALGO_NAME)_DIR := hmac-md5/ $(ALGO_NAME)_OBJ := hmac-md5.o md5-asm.o $(ALGO_NAME)_TEST_BIN := main-hmac-md5-test.o $(CLI_STD) \ - nessie_mac_test.o nessie_common.o base64_enc.o base64_dec.o + nessie_mac_test.o nessie_common.o $(ALGO_NAME)_NESSIE_TEST := "nessie" $(ALGO_NAME)_PERFORMANCE_TEST := "performance" diff --git a/mkfiles/hmac-sha1.mk b/mkfiles/hmac-sha1.mk index 9087400..40958a7 100644 --- a/mkfiles/hmac-sha1.mk +++ b/mkfiles/hmac-sha1.mk @@ -4,6 +4,7 @@ ALGO_NAME := HMAC-SHA1 # comment out the following line for removement of HMAC-SHA1 from the build process MACS += $(ALGO_NAME) +$(ALGO_NAME)_DIR := hmac-sha1/ $(ALGO_NAME)_OBJ := hmac-sha1.o sha1-asm.o $(ALGO_NAME)_TEST_BIN := main-hmac-sha1-test.o $(CLI_STD) \ nessie_mac_test.o nessie_common.o diff --git a/mkfiles/hmac-sha256.mk b/mkfiles/hmac-sha256.mk index ba221ab..4b25ea9 100644 --- a/mkfiles/hmac-sha256.mk +++ b/mkfiles/hmac-sha256.mk @@ -4,6 +4,7 @@ ALGO_NAME := HMAC-SHA256 # comment out the following line for removement of HMAC-SHA256 from the build process MACS += $(ALGO_NAME) +$(ALGO_NAME)_DIR := hmac-sha256/ $(ALGO_NAME)_OBJ := hmac-sha256.o sha256-asm.o $(ALGO_NAME)_TEST_BIN := main-hmac-sha256-test.o $(CLI_STD) \ nessie_mac_test.o nessie_common.o diff --git a/mkfiles/md5.mk b/mkfiles/md5.mk index 025e9eb..bebdaa3 100644 --- a/mkfiles/md5.mk +++ b/mkfiles/md5.mk @@ -4,6 +4,7 @@ ALGO_NAME := MD5 # comment out the following line for removement of MD5 from the build process HASHES += $(ALGO_NAME) +$(ALGO_NAME)_DIR := md5/ $(ALGO_NAME)_OBJ := md5-asm.o $(ALGO_NAME)_TEST_BIN := main-md5-test.o hfal_md5.o $(CLI_STD) $(HFAL_STD) $(ALGO_NAME)_NESSIE_TEST := "nessie" diff --git a/mkfiles/md5_c.mk b/mkfiles/md5_c.mk index 7b6bb1a..d7421e7 100644 --- a/mkfiles/md5_c.mk +++ b/mkfiles/md5_c.mk @@ -4,6 +4,7 @@ ALGO_NAME := MD5_C # comment out the following line for removement of MD5 from the build process HASHES += $(ALGO_NAME) +$(ALGO_NAME)_DIR := md5/ $(ALGO_NAME)_OBJ := md5.o $(ALGO_NAME)_TEST_BIN := main-md5-test.o hfal_md5.o $(CLI_STD) $(HFAL_STD) $(ALGO_NAME)_NESSIE_TEST := "nessie" diff --git a/mkfiles/mickey128.mk b/mkfiles/mickey128.mk index 6c95c96..c7bc17d 100644 --- a/mkfiles/mickey128.mk +++ b/mkfiles/mickey128.mk @@ -4,6 +4,7 @@ ALGO_NAME := MICKEY128 # comment out the following line for removement of Mickey128 from the build process STREAM_CIPHERS += $(ALGO_NAME) +$(ALGO_NAME)_DIR := mickey128/ $(ALGO_NAME)_OBJ := mickey128.o $(ALGO_NAME)_TEST_BIN := main-mickey128-test.o $(CLI_STD) \ nessie_stream_test.o nessie_common.o diff --git a/mkfiles/present.mk b/mkfiles/present.mk index 3a4012c..3c73f82 100644 --- a/mkfiles/present.mk +++ b/mkfiles/present.mk @@ -4,7 +4,7 @@ ALGO_NAME := PRESENT # comment out the following line for removement of present from the build process BLOCK_CIPHERS += $(ALGO_NAME) - +$(ALGO_NAME)_DIR := present/ $(ALGO_NAME)_OBJ := present.o $(ALGO_NAME)_TEST_BIN := main-present-test.o $(CLI_STD) \ nessie_bc_test.o nessie_common.o performance_test.o diff --git a/mkfiles/rc5.mk b/mkfiles/rc5.mk index 3a5f128..be82175 100644 --- a/mkfiles/rc5.mk +++ b/mkfiles/rc5.mk @@ -4,7 +4,7 @@ ALGO_NAME := RC5 # comment out the following line for removement of RC5 from the build process BLOCK_CIPHERS += $(ALGO_NAME) - +$(ALGO_NAME)_DIR := rc5/ $(ALGO_NAME)_OBJ := rc5.o $(ALGO_NAME)_TEST_BIN := main-rc5-test.o $(CLI_STD) nessie_bc_test.o \ nessie_common.o performance_test.o diff --git a/mkfiles/rc6.mk b/mkfiles/rc6.mk index e0bc603..a58b138 100644 --- a/mkfiles/rc6.mk +++ b/mkfiles/rc6.mk @@ -4,7 +4,7 @@ ALGO_NAME := RC6 # comment out the following line for removement of RC6 from the build process BLOCK_CIPHERS += $(ALGO_NAME) - +$(ALGO_NAME)_DIR := rc6/ $(ALGO_NAME)_OBJ := rc6.o $(ALGO_NAME)_TEST_BIN := main-rc6-test.o $(CLI_STD) \ nessie_bc_test.o nessie_common.o performance_test.o diff --git a/mkfiles/seed.mk b/mkfiles/seed.mk index 1c2b605..07ceb3e 100644 --- a/mkfiles/seed.mk +++ b/mkfiles/seed.mk @@ -4,6 +4,7 @@ ALGO_NAME := SEED # comment out the following line for removement of SEED from the build process BLOCK_CIPHERS += $(ALGO_NAME) +$(ALGO_NAME)_DIR := seed/ $(ALGO_NAME)_OBJ := seed-asm.o $(ALGO_NAME)_TEST_BIN := main-seed-test.o $(CLI_STD) \ nessie_bc_test.o nessie_common.o performance_test.o diff --git a/mkfiles/seed_C.mk b/mkfiles/seed_C.mk index 14aa8ac..7c77666 100644 --- a/mkfiles/seed_C.mk +++ b/mkfiles/seed_C.mk @@ -4,6 +4,7 @@ ALGO_NAME := SEED_C # comment out the following line for removement of SEED from the build process BLOCK_CIPHERS += $(ALGO_NAME) +$(ALGO_NAME)_DIR := seed/ $(ALGO_NAME)_OBJ := seed_C.o $(ALGO_NAME)_TEST_BIN := main-seed-test.o $(CLI_STD) \ nessie_bc_test.o nessie_common.o performance_test.o diff --git a/mkfiles/serpent-bitslice.mk b/mkfiles/serpent-bitslice.mk index 6a9b76a..5719143 100644 --- a/mkfiles/serpent-bitslice.mk +++ b/mkfiles/serpent-bitslice.mk @@ -4,7 +4,7 @@ ALGO_NAME := SERPENT_BITSLICE # comment out the following line for removement of serpent from the build process BLOCK_CIPHERS += $(ALGO_NAME) - +$(ALGO_NAME)_DIR := serpent/ $(ALGO_NAME)_OBJ := serpent-asm.o serpent-sboxes-bitslice-asm.o memxor.o $(ALGO_NAME)_TEST_BIN := main-serpent-test.o $(CLI_STD) \ nessie_bc_test.o nessie_common.o performance_test.o diff --git a/mkfiles/serpent_asm_bitslice.mk b/mkfiles/serpent_asm_bitslice.mk index afd0868..a5956c3 100644 --- a/mkfiles/serpent_asm_bitslice.mk +++ b/mkfiles/serpent_asm_bitslice.mk @@ -4,7 +4,7 @@ ALGO_NAME := SERPENT_ASM_BITSLICE # comment out the following line for removement of serpent from the build process BLOCK_CIPHERS += $(ALGO_NAME) - +$(ALGO_NAME)_DIR := serpent/ $(ALGO_NAME)_OBJ := serpent-sboxes-bitslice-asm.o serpent-asm.o memxor.o $(ALGO_NAME)_TEST_BIN := main-serpent-test.o $(CLI_STD) \ nessie_bc_test.o nessie_common.o performance_test.o diff --git a/mkfiles/serpent_asm_fast.mk b/mkfiles/serpent_asm_fast.mk index 3e3a4fb..d9ff725 100644 --- a/mkfiles/serpent_asm_fast.mk +++ b/mkfiles/serpent_asm_fast.mk @@ -4,7 +4,7 @@ ALGO_NAME := SERPENT_ASM_FAST # comment out the following line for removement of serpent from the build process BLOCK_CIPHERS += $(ALGO_NAME) - +$(ALGO_NAME)_DIR := serpent/ $(ALGO_NAME)_OBJ := serpent-asm.o serpent-sboxes-fast.o memxor.o $(ALGO_NAME)_TEST_BIN := main-serpent-test.o $(CLI_STD) \ nessie_bc_test.o nessie_common.o performance_test.o diff --git a/mkfiles/serpent_asm_small.mk b/mkfiles/serpent_asm_small.mk index 6dde94b..4d6750e 100644 --- a/mkfiles/serpent_asm_small.mk +++ b/mkfiles/serpent_asm_small.mk @@ -4,7 +4,7 @@ ALGO_NAME := SERPENT_ASM_SMALL # comment out the following line for removement of serpent from the build process BLOCK_CIPHERS += $(ALGO_NAME) - +$(ALGO_NAME)_DIR := serpent/ $(ALGO_NAME)_OBJ := serpent-asm.o serpent-sboxes-small.o memxor.o $(ALGO_NAME)_TEST_BIN := main-serpent-test.o $(CLI_STD) \ nessie_bc_test.o nessie_common.o performance_test.o diff --git a/mkfiles/serpent_c.mk b/mkfiles/serpent_c.mk index dd3a69b..f52ced4 100644 --- a/mkfiles/serpent_c.mk +++ b/mkfiles/serpent_c.mk @@ -4,7 +4,7 @@ ALGO_NAME := SERPENT_C # comment out the following line for removement of serpent from the build process BLOCK_CIPHERS += $(ALGO_NAME) - +$(ALGO_NAME)_DIR := serpent/ $(ALGO_NAME)_OBJ := serpent.o serpent-sboxes_c.o memxor.o $(ALGO_NAME)_TEST_BIN := main-serpent-test.o $(CLI_STD) \ nessie_bc_test.o nessie_common.o performance_test.o diff --git a/mkfiles/sha1.mk b/mkfiles/sha1.mk index 45df051..c986aeb 100644 --- a/mkfiles/sha1.mk +++ b/mkfiles/sha1.mk @@ -4,6 +4,7 @@ ALGO_NAME := SHA1 # comment out the following line for removement of SHA1 from the build process HASHES += $(ALGO_NAME) +$(ALGO_NAME)_DIR := sha1/ $(ALGO_NAME)_OBJ := sha1-asm.o $(ALGO_NAME)_TEST_BIN := main-sha1-test.o hfal_sha1.o $(CLI_STD) $(HFAL_STD) dump-decl.o dump-asm.o $(ALGO_NAME)_NESSIE_TEST := "nessie" diff --git a/mkfiles/sha1_c.mk b/mkfiles/sha1_c.mk index 4b0b7ae..6998cbc 100644 --- a/mkfiles/sha1_c.mk +++ b/mkfiles/sha1_c.mk @@ -4,6 +4,7 @@ ALGO_NAME := SHA1_C # comment out the following line for removement of SHA1 from the build process HASHES += $(ALGO_NAME) +$(ALGO_NAME)_DIR := sha1/ $(ALGO_NAME)_OBJ := sha1.o $(ALGO_NAME)_TEST_BIN := main-sha1-test.o hfal_sha1.o dump-asm.o dump-decl.o $(CLI_STD) $(HFAL_STD) $(ALGO_NAME)_NESSIE_TEST := "nessie" diff --git a/mkfiles/sha256.mk b/mkfiles/sha256.mk index 93ebdc7..9e56ed6 100644 --- a/mkfiles/sha256.mk +++ b/mkfiles/sha256.mk @@ -4,6 +4,7 @@ ALGO_NAME := SHA256 # comment out the following line for removement of SHA256 from the build process HASHES += $(ALGO_NAME) +$(ALGO_NAME)_DIR := sha256/ $(ALGO_NAME)_OBJ := sha256-asm.o $(ALGO_NAME)_TEST_BIN := main-sha256-test.o dump-asm.o dump-decl.o hfal_sha256.o $(CLI_STD) $(HFAL_STD) diff --git a/mkfiles/sha256_c.mk b/mkfiles/sha256_c.mk index d52fe88..6c58677 100644 --- a/mkfiles/sha256_c.mk +++ b/mkfiles/sha256_c.mk @@ -4,6 +4,7 @@ ALGO_NAME := SHA256_C # comment out the following line for removement of SHA256 from the build process HASHES += $(ALGO_NAME) +$(ALGO_NAME)_DIR := sha256/ $(ALGO_NAME)_OBJ := sha256.o $(ALGO_NAME)_TEST_BIN := main-sha256-test.o $(CLI_STD) $(HFAL_STD) hfal_sha256.o dump-asm.o dump-decl.o $(ALGO_NAME)_NESSIE_TEST := "nessie" diff --git a/mkfiles/shabea.mk b/mkfiles/shabea.mk index e5f4a68..9c05cb3 100644 --- a/mkfiles/shabea.mk +++ b/mkfiles/shabea.mk @@ -4,6 +4,7 @@ ALGO_NAME := SHABEA # comment out the following line for removement of SHABEA from the build process BLOCK_CIPHERS += $(ALGO_NAME) +$(ALGO_NAME)_DIR := shabea/ $(ALGO_NAME)_OBJ := shabea.o sha256-asm.o memxor.o $(ALGO_NAME)_TEST_BIN := main-shabea-test.o $(CLI_STD) \ nessie_bc_test.o nessie_common.o performance_test.o diff --git a/mkfiles/shacal1enc.mk b/mkfiles/shacal1enc.mk index 63be6c8..3003638 100644 --- a/mkfiles/shacal1enc.mk +++ b/mkfiles/shacal1enc.mk @@ -1,10 +1,10 @@ -# Makefile for present +# Makefile for shacal1 ALGO_NAME := SHACAL1ENC -# comment out the following line for removement of present from the build process +# comment out the following line for removement of shacal1 from the build process BLOCK_CIPHERS += $(ALGO_NAME) - +$(ALGO_NAME)_DIR := shacal1/ $(ALGO_NAME)_OBJ := shacal1_enc.o sha1-asm.o $(ALGO_NAME)_TEST_BIN := main-shacal1_enc-test.o $(CLI_STD) \ nessie_bc_test.o nessie_common.o performance_test.o diff --git a/mkfiles/shacal2enc.mk b/mkfiles/shacal2enc.mk index e8a91a2..d5f0d9d 100644 --- a/mkfiles/shacal2enc.mk +++ b/mkfiles/shacal2enc.mk @@ -1,10 +1,10 @@ -# Makefile for present +# Makefile for shacal2 ALGO_NAME := SHACAL2ENC -# comment out the following line for removement of present from the build process +# comment out the following line for removement of shacal2 from the build process BLOCK_CIPHERS += $(ALGO_NAME) - +$(ALGO_NAME)_DIR := shacal2/ $(ALGO_NAME)_OBJ := shacal2_enc.o sha256-asm.o $(ALGO_NAME)_TEST_BIN := main-shacal2_enc-test.o $(CLI_STD) \ nessie_bc_test.o nessie_common.o performance_test.o diff --git a/mkfiles/skipjack.mk b/mkfiles/skipjack.mk index 56579b2..ef859df 100644 --- a/mkfiles/skipjack.mk +++ b/mkfiles/skipjack.mk @@ -4,6 +4,7 @@ ALGO_NAME := SKIPJACK # comment out the following line for removement of skipjack from the build process BLOCK_CIPHERS += $(ALGO_NAME) +$(ALGO_NAME)_DIR := skipjack/ $(ALGO_NAME)_OBJ := skipjack.o $(ALGO_NAME)_TEST_BIN := main-skipjack-test.o $(CLI_STD) \ nessie_bc_test.o nessie_common.o performance_test.o diff --git a/mkfiles/tdes.mk b/mkfiles/tdes.mk index 7bdc41e..09bb96c 100644 --- a/mkfiles/tdes.mk +++ b/mkfiles/tdes.mk @@ -1,9 +1,10 @@ -# Makefile for DES +# Makefile for Triple-DES ALGO_NAME := TDES -# comment out the following line for removement of DES from the build process +# comment out the following line for removement of Triple-DES from the build process BLOCK_CIPHERS += $(ALGO_NAME) +$(ALGO_NAME)_DIR := des/ $(ALGO_NAME)_OBJ := des.o $(ALGO_NAME)_TEST_BIN := main-tdes-test.o $(CLI_STD) \ nessie_bc_test.o nessie_common.o performance_test.o diff --git a/mkfiles/trivium.mk b/mkfiles/trivium.mk index bbfba3b..a668ee7 100644 --- a/mkfiles/trivium.mk +++ b/mkfiles/trivium.mk @@ -4,6 +4,7 @@ ALGO_NAME := TRIVIUM # comment out the following line for removement of Trivium from the build process STREAM_CIPHERS += $(ALGO_NAME) +$(ALGO_NAME)_DIR := trivium/ $(ALGO_NAME)_OBJ := trivium.o $(ALGO_NAME)_TEST_BIN := main-trivium-test.o $(CLI_STD) \ nessie_stream_test.o nessie_common.o performance_test.o diff --git a/mkfiles/xtea.mk b/mkfiles/xtea.mk index 68adcfb..f2d1169 100644 --- a/mkfiles/xtea.mk +++ b/mkfiles/xtea.mk @@ -4,6 +4,7 @@ ALGO_NAME := XTEA # comment out the following line for removement of XTEA from the build process BLOCK_CIPHERS += $(ALGO_NAME) +$(ALGO_NAME)_DIR := xtea/ $(ALGO_NAME)_OBJ := xtea-asm.o $(ALGO_NAME)_TEST_BIN := main-xtea-test.o $(CLI_STD) \ nessie_bc_test.o nessie_common.o performance_test.o diff --git a/mkfiles/xtea_c.mk b/mkfiles/xtea_c.mk index dec8f8c..5bbd680 100644 --- a/mkfiles/xtea_c.mk +++ b/mkfiles/xtea_c.mk @@ -4,6 +4,7 @@ ALGO_NAME := XTEA_C # comment out the following line for removement of XTEA from the build process BLOCK_CIPHERS += $(ALGO_NAME) +$(ALGO_NAME)_DIR := xtea/ $(ALGO_NAME)_OBJ := xtea.o $(ALGO_NAME)_TEST_BIN := main-xtea-test.o $(CLI_STD) \ nessie_bc_test.o nessie_common.o performance_test.o diff --git a/present.c b/present/present.c similarity index 100% rename from present.c rename to present/present.c diff --git a/present.h b/present/present.h similarity index 100% rename from present.h rename to present/present.h diff --git a/rc5.c b/rc5/rc5.c similarity index 100% rename from rc5.c rename to rc5/rc5.c diff --git a/rc5.h b/rc5/rc5.h similarity index 100% rename from rc5.h rename to rc5/rc5.h diff --git a/rc6.c b/rc6/rc6.c similarity index 100% rename from rc6.c rename to rc6/rc6.c diff --git a/rc6.h b/rc6/rc6.h similarity index 100% rename from rc6.h rename to rc6/rc6.h diff --git a/seed-asm.S b/seed/seed-asm.S similarity index 100% rename from seed-asm.S rename to seed/seed-asm.S diff --git a/seed.h b/seed/seed.h similarity index 100% rename from seed.h rename to seed/seed.h diff --git a/seed_C.c b/seed/seed_C.c similarity index 100% rename from seed_C.c rename to seed/seed_C.c diff --git a/seed_sbox.h b/seed/seed_sbox.h similarity index 100% rename from seed_sbox.h rename to seed/seed_sbox.h diff --git a/serpent/memxor.S b/serpent/memxor.S new file mode 100644 index 0000000..a32058b --- /dev/null +++ b/serpent/memxor.S @@ -0,0 +1,66 @@ +/* memxor.S */ +/* + This file is part of the AVR-Crypto-Lib. + Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +/* + * File: memxor.S + * Author: Daniel Otte + * Date: 2008-08-07 + * License: GPLv3 or later + * Description: memxor, XORing one block into another + * + */ + +/* + * void memxor(void* dest, const void* src, uint16_t n); + */ + /* + * param dest is passed in r24:r25 + * param src is passed in r22:r23 + * param n is passed in r20:r21 + */ +.global memxor +memxor: + movw r30, r24 + movw r26, r22 + movw r24, r20 + adiw r24, 0 + breq 2f +1: + ld r20, X+ + ld r21, Z + eor r20, r21 + st Z+, r20 + sbiw r24, 1 + brne 1b +2: + ret + + + + + + + + + + + + + + diff --git a/serpent/memxor.h b/serpent/memxor.h new file mode 100644 index 0000000..a62a616 --- /dev/null +++ b/serpent/memxor.h @@ -0,0 +1,7 @@ +#ifndef MEMXOR_H_ +#define MEMXOR_H_ +#include + +void memxor(void* dest, const void* src, uint16_t n); + +#endif diff --git a/serpent-asm.S b/serpent/serpent-asm.S similarity index 100% rename from serpent-asm.S rename to serpent/serpent-asm.S diff --git a/serpent-sboxes-bitslice-asm.S b/serpent/serpent-sboxes-bitslice-asm.S similarity index 100% rename from serpent-sboxes-bitslice-asm.S rename to serpent/serpent-sboxes-bitslice-asm.S diff --git a/serpent-sboxes-bitslice.c b/serpent/serpent-sboxes-bitslice.c similarity index 100% rename from serpent-sboxes-bitslice.c rename to serpent/serpent-sboxes-bitslice.c diff --git a/serpent-sboxes-fast.S b/serpent/serpent-sboxes-fast.S similarity index 100% rename from serpent-sboxes-fast.S rename to serpent/serpent-sboxes-fast.S diff --git a/serpent-sboxes-small.S b/serpent/serpent-sboxes-small.S similarity index 100% rename from serpent-sboxes-small.S rename to serpent/serpent-sboxes-small.S diff --git a/serpent-sboxes.h b/serpent/serpent-sboxes.h similarity index 100% rename from serpent-sboxes.h rename to serpent/serpent-sboxes.h diff --git a/serpent-sboxes_c.c b/serpent/serpent-sboxes_c.c similarity index 100% rename from serpent-sboxes_c.c rename to serpent/serpent-sboxes_c.c diff --git a/serpent.c b/serpent/serpent.c similarity index 100% rename from serpent.c rename to serpent/serpent.c diff --git a/serpent.h b/serpent/serpent.h similarity index 100% rename from serpent.h rename to serpent/serpent.h diff --git a/sha1/sha1-asm.S b/sha1/sha1-asm.S new file mode 100644 index 0000000..f571685 --- /dev/null +++ b/sha1/sha1-asm.S @@ -0,0 +1,886 @@ +/* sha1-asm.S */ +/* + This file is part of the AVR-Crypto-Lib. + Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ +/* + * Author: Daniel Otte + * + * License: GPLv3 or later +*/ +; SHA1 implementation in assembler for AVR +SHA1_BLOCK_BITS = 512 +SHA1_HASH_BITS = 160 + +.macro precall + /* push r18 - r27, r30 - r31*/ + push r0 + push r1 + push r18 + push r19 + push r20 + push r21 + push r22 + push r23 + push r24 + push r25 + push r26 + push r27 + push r30 + push r31 + clr r1 +.endm + +.macro postcall + pop r31 + pop r30 + pop r27 + pop r26 + pop r25 + pop r24 + pop r23 + pop r22 + pop r21 + pop r20 + pop r19 + pop r18 + pop r1 + pop r0 +.endm + + +.macro hexdump length + push r27 + push r26 + ldi r25, '\r' + mov r24, r25 + call uart_putc + ldi r25, '\n' + mov r24, r25 + call uart_putc + pop r26 + pop r27 + movw r24, r26 +.if \length > 16 + ldi r22, lo8(16) + ldi r23, hi8(16) + push r27 + push r26 + call uart_hexdump + pop r26 + pop r27 + adiw r26, 16 + hexdump \length-16 +.else + ldi r22, lo8(\length) + ldi r23, hi8(\length) + call uart_hexdump +.endif +.endm + +.macro delay +/* + push r0 + push r1 + clr r0 +1: clr r1 +2: dec r1 + brne 2b + dec r0 + brne 1b + pop r1 + pop r0 // */ +.endm + +/* X points to Block */ +.macro dbg_hexdump length +/* + precall + hexdump \length + postcall + // */ +.endm + + + +.section .text + +SPL = 0x3D +SPH = 0x3E +SREG = 0x3F + + +; +;sha1_ctx_t is: +; +; [h0][h1][h2][h3][h4][length] +; hn is 32 bit large, length is 64 bit large + +;########################################################### + +.global sha1_ctx2hash +; === sha1_ctx2hash === +; this function converts a state into a normal hash (bytestring) +; param1: the 16-bit destination pointer +; given in r25,r24 (r25 is most significant) +; param2: the 16-bit pointer to sha1_ctx structure +; given in r23,r22 +sha1_ctx2hash: + movw r26, r22 + movw r30, r24 + ldi r21, 5 + sbiw r26, 4 +1: + ldi r20, 4 + adiw r26, 8 +2: + ld r0, -X + st Z+, r0 + dec r20 + brne 2b + + dec r21 + brne 1b + + ret + +;########################################################### + +.global sha1 +; === sha1 === +; this function calculates SHA-1 hashes from messages in RAM +; param1: the 16-bit hash destination pointer +; given in r25,r24 (r25 is most significant) +; param2: the 16-bit pointer to message +; given in r23,r22 +; param3: 32-bit length value (length of message in bits) +; given in r21,r20,r19,r18 +sha1: +sha1_prolog: + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r16 + push r17 + in r16, SPL + in r17, SPH + subi r16, 5*4+8 + sbci r17, 0 + in r0, SREG + cli + out SPL, r16 + out SPH, r17 + out SREG, r0 + + push r25 + push r24 + inc r16 + adc r17, r1 + + movw r8, r18 /* backup of length*/ + movw r10, r20 + + movw r12, r22 /* backup pf msg-ptr */ + + movw r24, r16 + rcall sha1_init + /* if length >= 512 */ +1: + tst r11 + brne 4f + tst r10 + brne 4f + mov r19, r9 + cpi r19, 0x02 + brlo 4f + + movw r24, r16 + movw r22, r12 + rcall sha1_nextBlock + ldi r19, 0x64 + add r22, r19 + adc r23, r1 + /* length -= 512 */ + ldi r19, 0x02 + sub r9, r19 + sbc r10, r1 + sbc r11, r1 + rjmp 1b + +4: + movw r24, r16 + movw r22, r12 + movw r20, r8 + rcall sha1_lastBlock + + pop r24 + pop r25 + movw r22, r16 + rcall sha1_ctx2hash + +sha1_epilog: + in r30, SPL + in r31, SPH + adiw r30, 5*4+8 + in r0, SREG + cli + out SPL, r30 + out SPH, r31 + out SREG, r0 + pop r17 + pop r16 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + ret + +;########################################################### + + +; block MUST NOT be larger than 64 bytes + +.global sha1_lastBlock +; === sha1_lastBlock === +; this function does padding & Co. for calculating SHA-1 hashes +; param1: the 16-bit pointer to sha1_ctx structure +; given in r25,r24 (r25 is most significant) +; param2: an 16-bit pointer to 64 byte block to hash +; given in r23,r22 +; param3: an 16-bit integer specifing length of block in bits +; given in r21,r20 +sha1_lastBlock_localSpace = (SHA1_BLOCK_BITS/8+1) + + +sha1_lastBlock: + cpi r21, 0x02 + brlo sha1_lastBlock_prolog + push r25 + push r24 + push r23 + push r22 + push r21 + push r20 + rcall sha1_nextBlock + pop r20 + pop r21 + pop r22 + pop r23 + pop r24 + pop r25 + subi r21, 2 + subi r23, -2 + rjmp sha1_lastBlock +sha1_lastBlock_prolog: + /* allocate space on stack */ + in r30, SPL + in r31, SPH + in r1, SREG + subi r30, lo8(64) + sbci r31, hi8(64) /* ??? */ + cli + out SPL, r30 + out SPH, r31 + out SREG,r1 + + adiw r30, 1 /* SP points to next free byte on stack */ + mov r18, r20 /* r20 = LSB(length) */ + lsr r18 + lsr r18 + lsr r18 + bst r21, 0 /* may be we should explain this ... */ + bld r18, 5 /* now: r18 == length/8 (aka. length in bytes) */ + + + movw r26, r22 /* X points to begin of msg */ + tst r18 + breq sha1_lastBlock_post_copy + mov r1, r18 +sha1_lastBlock_copy_loop: + ld r0, X+ + st Z+, r0 + dec r1 + brne sha1_lastBlock_copy_loop +sha1_lastBlock_post_copy: +sha1_lastBlock_insert_stuffing_bit: + ldi r19, 0x80 + mov r0,r19 + ldi r19, 0x07 + and r19, r20 /* if we are in bitmode */ + breq 2f /* no bitmode */ +1: + lsr r0 + dec r19 + brne 1b + ld r19, X +/* maybe we should do some ANDing here, just for safety */ + or r0, r19 +2: + st Z+, r0 + inc r18 + +/* checking stuff here */ + cpi r18, 64-8+1 + brsh 0f + rjmp sha1_lastBlock_insert_zeros +0: + /* oh shit, we landed here */ + /* first we have to fill it up with zeros */ + ldi r19, 64 + sub r19, r18 + breq 2f +1: + st Z+, r1 + dec r19 + brne 1b +2: + sbiw r30, 63 + sbiw r30, 1 + movw r22, r30 + + push r31 + push r30 + push r25 + push r24 + push r21 + push r20 + rcall sha1_nextBlock + pop r20 + pop r21 + pop r24 + pop r25 + pop r30 + pop r31 + + /* now we should subtract 512 from length */ + movw r26, r24 + adiw r26, 4*5+1 /* we can skip the lowest byte */ + ld r19, X + subi r19, hi8(512) + st X+, r19 + ldi r18, 6 +1: + ld r19, X + sbci r19, 0 + st X+, r19 + dec r18 + brne 1b + +; clr r18 /* not neccessary ;-) */ + /* reset Z pointer to begin of block */ + +sha1_lastBlock_insert_zeros: + ldi r19, 64-8 + sub r19, r18 + breq sha1_lastBlock_insert_length + clr r1 +1: + st Z+, r1 /* r1 is still zero */ + dec r19 + brne 1b + +; rjmp sha1_lastBlock_epilog +sha1_lastBlock_insert_length: + movw r26, r24 /* X points to state */ + adiw r26, 5*4 /* X points to (state.length) */ + adiw r30, 8 /* Z points one after the last byte of block */ + ld r0, X+ + add r0, r20 + st -Z, r0 + ld r0, X+ + adc r0, r21 + st -Z, r0 + ldi r19, 6 +1: + ld r0, X+ + adc r0, r1 + st -Z, r0 + dec r19 + brne 1b + + sbiw r30, 64-8 + movw r22, r30 + rcall sha1_nextBlock + +sha1_lastBlock_epilog: + in r30, SPL + in r31, SPH + in r1, SREG + adiw r30, 63 ; lo8(64) + adiw r30, 1 ; hi8(64) + cli + out SPL, r30 + out SPH, r31 + out SREG,r1 + clr r1 + clr r0 + ret + +/**/ +;########################################################### + +.global sha1_nextBlock +; === sha1_nextBlock === +; this is the core function for calculating SHA-1 hashes +; param1: the 16-bit pointer to sha1_ctx structure +; given in r25,r24 (r25 is most significant) +; param2: an 16-bit pointer to 64 byte block to hash +; given in r23,r22 +sha1_nextBlock_localSpace = (16+5+1)*4 ; 16 32-bit values for w array and 5 32-bit values for a array (total 84 byte) + +xtmp = 0 +xNULL = 1 +W1 = 10 +W2 = 11 +T1 = 12 +T2 = 13 +T3 = 14 +T4 = 15 +LoopC = 16 +S = 17 +tmp1 = 18 +tmp2 = 19 +tmp3 = 20 +tmp4 = 21 +F1 = 22 +F2 = 23 +F3 = 24 +F4 = 25 + +/* byteorder: high number <--> high significance */ +sha1_nextBlock: + ; initial, let's make some space ready for local vars + /* replace push & pop by mem ops? */ + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + push r28 + push r29 + in r20, SPL + in r21, SPH + movw r18, r20 ;backup SP +; movw r26, r20 ; X points to free space on stack /* maybe removeable? */ + movw r30, r22 ; Z points to message + subi r20, lo8(sha1_nextBlock_localSpace) ;sbiw can do only up to 63 + sbci r21, hi8(sha1_nextBlock_localSpace) + movw r26, r20 ; X points to free space on stack + in r0, SREG + cli ; we want to be uninterrupted while updating SP + out SPL, r20 + out SPH, r21 + out SREG, r0 + + push r18 + push r19 /* push old SP on new stack */ + push r24 + push r25 /* param1 will be needed later */ + + /* load a[] with state */ + movw 28, r24 /* load pointer to state in Y */ + adiw r26, 1 ; X++ + + ldi LoopC, 5*4 +1: ld tmp1, Y+ + st X+, tmp1 + dec LoopC + brne 1b + + movw W1, r26 /* save pointer to w[0] */ + /* load w[] with endian fixed message */ + /* we might also use the changeendian32() function at bottom */ + movw r30, r22 /* mv param2 (ponter to msg) to Z */ + ldi LoopC, 16 +1: + ldd tmp1, Z+3 + st X+, tmp1 + ldd tmp1, Z+2 + st X+, tmp1 + ldd tmp1, Z+1 + st X+, tmp1 + ld tmp1, Z + st X+, tmp1 + adiw r30, 4 + dec LoopC + brne 1b + + ;clr LoopC /* LoopC is named t in FIPS 180-2 */ + clr xtmp +sha1_nextBlock_mainloop: + mov S, LoopC + lsl S + lsl S + andi S, 0x3C /* S is a bytepointer so *4 */ + /* load w[s] */ + movw r26, W1 + add r26, S /* X points at w[s] */ + adc r27, xNULL + ld T1, X+ + ld T2, X+ + ld T3, X+ + ld T4, X+ + + /**/ + push r26 + push r27 + push T4 + push T3 + push T2 + push T1 + in r26, SPL + in r27, SPH + adiw r26, 1 + dbg_hexdump 4 + pop T1 + pop T2 + pop T3 + pop T4 + pop r27 + pop r26 + /**/ + + cpi LoopC, 16 + brlt sha1_nextBlock_mainloop_core + /* update w[s] */ + ldi tmp1, 2*4 + rcall 1f + ldi tmp1, 8*4 + rcall 1f + ldi tmp1, 13*4 + rcall 1f + rjmp 2f +1: /* this might be "outsourced" to save the jump above */ + add tmp1, S + andi tmp1, 0x3f + movw r26, W1 + add r26, tmp1 + adc r27, xNULL + ld tmp2, X+ + eor T1, tmp2 + ld tmp2, X+ + eor T2, tmp2 + ld tmp2, X+ + eor T3, tmp2 + ld tmp2, X+ + eor T4, tmp2 + ret +2: /* now we just hav to do a ROTL(T) and save T back */ + mov tmp2, T4 + rol tmp2 + rol T1 + rol T2 + rol T3 + rol T4 + movw r26, W1 + add r26, S + adc r27, xNULL + st X+, T1 + st X+, T2 + st X+, T3 + st X+, T4 + +sha1_nextBlock_mainloop_core: /* ther core function; T=ROTL5(a) ....*/ + /* T already contains w[s] */ + movw r26, W1 + sbiw r26, 4*1 /* X points at a[4] aka e */ + ld tmp1, X+ + add T1, tmp1 + ld tmp1, X+ + adc T2, tmp1 + ld tmp1, X+ + adc T3, tmp1 + ld tmp1, X+ + adc T4, tmp1 /* T = w[s]+e */ + sbiw r26, 4*5 /* X points at a[0] aka a */ + ld F1, X+ + ld F2, X+ + ld F3, X+ + ld F4, X+ + mov tmp1, F4 /* X points at a[1] aka b */ + ldi tmp2, 5 +1: + rol tmp1 + rol F1 + rol F2 + rol F3 + rol F4 + dec tmp2 + brne 1b + + add T1, F1 + adc T2, F2 + adc T3, F3 + adc T4, F4 /* T = ROTL(a,5) + e + w[s] */ + + /* now we have to do this fucking conditional stuff */ + ldi r30, lo8(sha1_nextBlock_xTable) + ldi r31, hi8(sha1_nextBlock_xTable) + add r30, xtmp + adc r31, xNULL + lpm tmp1, Z + cp tmp1, LoopC + brne 1f + inc xtmp +1: ldi r30, lo8(sha1_nextBlock_KTable) + ldi r31, hi8(sha1_nextBlock_KTable) + lsl xtmp + lsl xtmp + add r30, xtmp + adc r31, xNULL + lsr xtmp + lsr xtmp + + lpm tmp1, Z+ + add T1, tmp1 + lpm tmp1, Z+ + adc T2, tmp1 + lpm tmp1, Z+ + adc T3, tmp1 + lpm tmp1, Z+ + adc T4, tmp1 + /* T = ROTL(a,5) + e + kt + w[s] */ + + /* Z-4 is just pointing to kt ... */ + movw r28, r26 /* copy X in Y */ + adiw r30, 3*4 /* now Z points to the rigth locatin in our jump-vector-table */ + lsr r31 + ror r30 + + icall + mov F1, tmp1 + icall + mov F2, tmp1 + icall + mov F3, tmp1 + icall + + add T1, F1 + adc T2, F2 + adc T3, F3 + adc T4, tmp1 /* T = ROTL5(a) + f_t(b,c,d) + e + k_t + w[s] */ + /* X points still at a[1] aka b, Y points at a[2] aka c */ + /* update a[] */ +sha1_nextBlock_update_a: + /*first we move all vars in a[] "one up" e=d, d=c, c=b, b=a*/ + //adiw r28, 3*4 /* Y should point at a[4] aka e */ + movw r28, W1 + sbiw r28, 4 + + ldi tmp2, 4*4 +1: + ld tmp1, -Y + std Y+4, tmp1 + dec tmp2 + brne 1b + /* Y points at a[0] aka a*/ + + movw r28, W1 + sbiw r28, 5*4 + /* store T in a[0] aka a */ + st Y+, T1 + st Y+, T2 + st Y+, T3 + st Y+, T4 + /* Y points at a[1] aka b*/ + + /* rotate c */ + ldd T1, Y+1*4 + ldd T2, Y+1*4+1 + ldd T3, Y+1*4+2 + ldd T4, Y+1*4+3 + mov tmp1, T1 + ldi tmp2, 2 +1: ror tmp1 + ror T4 + ror T3 + ror T2 + ror T1 + dec tmp2 + brne 1b + std Y+1*4+0, T1 + std Y+1*4+1, T2 + std Y+1*4+2, T3 + std Y+1*4+3, T4 + + push r27 + push r26 + movw r26, W1 + sbiw r26, 4*5 + dbg_hexdump 4*5 + pop r26 + pop r27 + + inc LoopC + cpi LoopC, 80 + brge 1f + rjmp sha1_nextBlock_mainloop +/**************************************/ +1: + /* littel patch */ + sbiw r28, 4 + +/* add a[] to state and inc length */ + pop r27 + pop r26 /* now X points to state (and Y still at a[0]) */ + ldi tmp4, 5 +1: clc + ldi tmp3, 4 +2: ld tmp1, X + ld tmp2, Y+ + adc tmp1, tmp2 + st X+, tmp1 + dec tmp3 + brne 2b + dec tmp4 + brne 1b + + /* now length += 512 */ + adiw r26, 1 /* we skip the least significant byte */ + ld tmp1, X + ldi tmp2, hi8(512) /* 2 */ + add tmp1, tmp2 + st X+, tmp1 + ldi tmp2, 6 +1: + ld tmp1, X + adc tmp1, xNULL + st X+, tmp1 + dec tmp2 + brne 1b + +; EPILOG +sha1_nextBlock_epilog: +/* now we should clean up the stack */ + pop r21 + pop r20 + in r0, SREG + cli ; we want to be uninterrupted while updating SP + out SPL, r20 + out SPH, r21 + out SREG, r0 + + clr r1 + pop r29 + pop r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + ret + +sha1_nextBlock_xTable: +.byte 20,40,60,0 +sha1_nextBlock_KTable: +.int 0x5a827999 +.int 0x6ed9eba1 +.int 0x8f1bbcdc +.int 0xca62c1d6 +sha1_nextBlock_JumpTable: +rjmp sha1_nextBlock_Ch + nop +rjmp sha1_nextBlock_Parity + nop +rjmp sha1_nextBlock_Maj + nop +rjmp sha1_nextBlock_Parity + + /* X and Y still point at a[1] aka b ; return value in tmp1 */ +sha1_nextBlock_Ch: + ld tmp1, Y+ + mov tmp2, tmp1 + com tmp2 + ldd tmp3, Y+3 /* load from c */ + and tmp1, tmp3 + ldd tmp3, Y+7 /* load from d */ + and tmp2, tmp3 + eor tmp1, tmp2 + ret + +sha1_nextBlock_Maj: + ld tmp1, Y+ + mov tmp2, tmp1 + ldd tmp3, Y+3 /* load from c */ + and tmp1, tmp3 + ldd tmp4, Y+7 /* load from d */ + and tmp2, tmp4 + eor tmp1, tmp2 + and tmp3, tmp4 + eor tmp1, tmp3 + ret + +sha1_nextBlock_Parity: + ld tmp1, Y+ + ldd tmp2, Y+3 /* load from c */ + eor tmp1, tmp2 + ldd tmp2, Y+7 /* load from d */ + eor tmp1, tmp2 + ret +/* +ch_str: .asciz "\r\nCh" +maj_str: .asciz "\r\nMaj" +parity_str: .asciz "\r\nParity" +*/ +;########################################################### + +.global sha1_init +;void sha1_init(sha1_ctx_t *state){ +; DEBUG_S("\r\nSHA1_INIT"); +; state->h[0] = 0x67452301; +; state->h[1] = 0xefcdab89; +; state->h[2] = 0x98badcfe; +; state->h[3] = 0x10325476; +; state->h[4] = 0xc3d2e1f0; +; state->length = 0; +;} +; param1: (Func3,r24) 16-bit pointer to sha1_ctx_t struct in ram +; modifys: Z(r30,r31), Func1, r22 +sha1_init: + movw r26, r24 ; (24,25) --> (26,27) load X with param1 + ldi r30, lo8((sha1_init_vector)) + ldi r31, hi8((sha1_init_vector)) + ldi r22, 5*4 /* bytes to copy */ +sha1_init_vloop: + lpm r23, Z+ + st X+, r23 + dec r22 + brne sha1_init_vloop + ldi r22, 8 +sha1_init_lloop: + st X+, r1 + dec r22 + brne sha1_init_lloop + ret + +sha1_init_vector: +.int 0x67452301; +.int 0xefcdab89; +.int 0x98badcfe; +.int 0x10325476; +.int 0xc3d2e1f0; + diff --git a/sha1.c b/sha1/sha1.c similarity index 100% rename from sha1.c rename to sha1/sha1.c diff --git a/sha1/sha1.h b/sha1/sha1.h new file mode 100644 index 0000000..6675d20 --- /dev/null +++ b/sha1/sha1.h @@ -0,0 +1,117 @@ +/* sha1.h */ +/* + This file is part of the AVR-Crypto-Lib. + Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ +/** + * \file sha1.h + * \author Daniel Otte + * \email daniel.otte@rub.de + * \date 2006-10-08 + * \license GPLv3 or later + * \brief SHA-1 declaration. + * \ingroup SHA-1 + * + */ + +#ifndef SHA1_H_ +#define SHA1_H_ + +#include +/** \def SHA1_HASH_BITS + * definees the size of a SHA-1 hash in bits + */ + +/** \def SHA1_HASH_BYTES + * definees the size of a SHA-1 hash in bytes + */ + +/** \def SHA1_BLOCK_BITS + * definees the size of a SHA-1 input block in bits + */ + +/** \def SHA1_BLOCK_BYTES + * definees the size of a SHA-1 input block in bytes + */ +#define SHA1_HASH_BITS 160 +#define SHA1_HASH_BYTES (SHA1_HASH_BITS/8) +#define SHA1_BLOCK_BITS 512 +#define SHA1_BLOCK_BYTES (SHA1_BLOCK_BITS/8) + +/** \typedef sha1_ctx_t + * \brief SHA-1 context type + * + * A vatiable of this type may hold the state of a SHA-1 hashing process + */ +typedef struct { + uint32_t h[5]; + uint64_t length; +} sha1_ctx_t; + +/** \typedef sha1_hash_t + * \brief hash value type + * A variable of this type may hold a SHA-1 hash value + */ +typedef uint8_t sha1_hash_t[SHA1_HASH_BITS/8]; + +/** \fn sha1_init(sha1_ctx_t *state) + * \brief initializes a SHA-1 context + * This function sets a ::sha1_ctx_t variable to the initialization vector + * for SHA-1 hashing. + * \param state pointer to the SHA-1 context variable + */ +void sha1_init(sha1_ctx_t *state); + +/** \fn sha1_nextBlock(sha1_ctx_t *state, const void* block) + * \brief process one input block + * This function processes one input block and updates the hash context + * accordingly + * \param state pointer to the state variable to update + * \param block pointer to the message block to process + */ +void sha1_nextBlock (sha1_ctx_t *state, const void* block); + +/** \fn sha1_lastBlock(sha1_ctx_t *state, const void* block, uint16_t length_b) + * \brief processes the given block and finalizes the context + * This function processes the last block in a SHA-1 hashing process. + * The block should have a maximum length of a single input block. + * \param state pointer to the state variable to update and finalize + * \param block pointer to themessage block to process + * \param length_b length of the message block in bits + */ +void sha1_lastBlock (sha1_ctx_t *state, const void* block, uint16_t length_b); + +/** \fn sha1_ctx2hash(sha1_hash_t *dest, sha1_ctx_t *state) + * \brief convert a state variable into an actual hash value + * Writes the hash value corresponding to the state to the memory pointed by dest. + * \param dest pointer to the hash value destination + * \param state pointer to the hash context + */ +void sha1_ctx2hash (sha1_hash_t *dest, sha1_ctx_t *state); + +/** \fn sha1(sha1_hash_t *dest, const void* msg, uint32_t length_b) + * \brief hashing a message which in located entirely in RAM + * This function automatically hashes a message which is entirely in RAM with + * the SHA-1 hashing algorithm. + * \param dest pointer to the hash value destination + * \param msg pointer to the message which should be hashed + * \param length_b length of the message in bits + */ +void sha1(sha1_hash_t *dest, const void* msg, uint32_t length_b); + + + +#endif /*SHA1_H_*/ diff --git a/sha256/sha256-asm.S b/sha256/sha256-asm.S new file mode 100644 index 0000000..d9eb6b6 --- /dev/null +++ b/sha256/sha256-asm.S @@ -0,0 +1,1042 @@ +/* sha256-asm.S */ +/* + This file is part of the AVR-Crypto-Lib. + Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ +/* + * Author: Daniel Otte + * + * License: GPLv3 or later +*/ +; sha-256 implementation in assembler +SHA256_BLOCK_BITS = 512 +SHA256_HASH_BITS = 256 + +.macro precall + /* push r18 - r27, r30 - r31*/ + push r0 + push r1 + push r18 + push r19 + push r20 + push r21 + push r22 + push r23 + push r24 + push r25 + push r26 + push r27 + push r30 + push r31 + clr r1 +.endm + +.macro postcall + pop r31 + pop r30 + pop r27 + pop r26 + pop r25 + pop r24 + pop r23 + pop r22 + pop r21 + pop r20 + pop r19 + pop r18 + pop r1 + pop r0 +.endm + + +.macro hexdump length + push r27 + push r26 + ldi r25, '\r' + mov r24, r25 + call uart_putc + ldi r25, '\n' + mov r24, r25 + call uart_putc + pop r26 + pop r27 + movw r24, r26 +.if \length > 16 + ldi r22, lo8(16) + ldi r23, hi8(16) + push r27 + push r26 + call uart_hexdump + pop r26 + pop r27 + adiw r26, 16 + hexdump \length-16 +.else + ldi r22, lo8(\length) + ldi r23, hi8(\length) + call uart_hexdump +.endif +.endm + +/* X points to Block */ +.macro dbg_hexdump length + precall + hexdump \length + postcall +.endm + +.section .text + +SPL = 0x3D +SPH = 0x3E +SREG = 0x3F + + +; +;sha256_ctx_t is: +; +; [h0][h1][h2][h3][h4][h5][h6][h7][length] +; hn is 32 bit large, length is 64 bit large + +;########################################################### + +.global sha256_ctx2hash +; === sha256_ctx2hash === +; this function converts a state into a normal hash (bytestring) +; param1: the 16-bit destination pointer +; given in r25,r24 (r25 is most significant) +; param2: the 16-bit pointer to sha256_ctx structure +; given in r23,r22 +sha256_ctx2hash: + movw r26, r22 + movw r30, r24 + ldi r21, 8 + sbiw r26, 4 +1: + ldi r20, 4 + adiw r26, 8 +2: + ld r0, -X + st Z+, r0 + dec r20 + brne 2b + + dec r21 + brne 1b + + ret + +;########################################################### + +.global sha256 +; === sha256 === +; this function calculates SHA-256 hashes from messages in RAM +; param1: the 16-bit hash destination pointer +; given in r25,r24 (r25 is most significant) +; param2: the 16-bit pointer to message +; given in r23,r22 +; param3: 32-bit length value (length of message in bits) +; given in r21,r20,r19,r18 +sha256: +sha256_prolog: + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r16 + push r17 + in r16, SPL + in r17, SPH + subi r16, 8*4+8 + sbci r17, 0 + in r0, SREG + cli + out SPL, r16 + out SPH, r17 + out SREG, r0 + + push r25 + push r24 + inc r16 + adc r17, r1 + + movw r8, r18 /* backup of length*/ + movw r10, r20 + + movw r12, r22 /* backup pf msg-ptr */ + + movw r24, r16 + rcall sha256_init + /* if length >= 512 */ +1: + tst r11 + brne 4f + tst r10 + brne 4f + mov r19, r9 + cpi r19, 0x02 + brlo 4f + + movw r24, r16 + movw r22, r12 + rcall sha256_nextBlock + ldi r19, 0x64 + add r22, r19 + adc r23, r1 + /* length -= 512 */ + ldi r19, 0x02 + sub r9, r19 + sbc r10, r1 + sbc r11, r1 + rjmp 1b + +4: + movw r24, r16 + movw r22, r12 + movw r20, r8 + rcall sha256_lastBlock + + pop r24 + pop r25 + movw r22, r16 + rcall sha256_ctx2hash + +sha256_epilog: + in r30, SPL + in r31, SPH + adiw r30, 8*4+8 + in r0, SREG + cli + out SPL, r30 + out SPH, r31 + out SREG, r0 + pop r17 + pop r16 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + ret + +;########################################################### + + +; block MUST NOT be larger than 64 bytes + +.global sha256_lastBlock +; === sha256_lastBlock === +; this function does padding & Co. for calculating SHA-256 hashes +; param1: the 16-bit pointer to sha256_ctx structure +; given in r25,r24 (r25 is most significant) +; param2: an 16-bit pointer to 64 byte block to hash +; given in r23,r22 +; param3: an 16-bit integer specifing length of block in bits +; given in r21,r20 +sha256_lastBlock_localSpace = (SHA256_BLOCK_BITS/8+1) + + +sha256_lastBlock: + cpi r21, 0x02 + brlo sha256_lastBlock_prolog + push r25 + push r24 + push r23 + push r22 + push r21 + push r20 + rcall sha256_nextBlock + pop r20 + pop r21 + pop r22 + pop r23 + pop r24 + pop r25 + subi r21, 0x02 + subi r23, -2 + rjmp sha256_lastBlock +sha256_lastBlock_prolog: + /* allocate space on stack */ + in r30, SPL + in r31, SPH + in r1, SREG + subi r30, lo8(64) + sbci r31, hi8(64) + cli + out SPL, r30 + out SPH, r31 + out SREG,r1 + + adiw r30, 1 /* SP points to next free byte on stack */ + mov r18, r20 /* r20 = LSB(length) */ + lsr r18 + lsr r18 + lsr r18 + bst r21, 0 /* may be we should explain this ... */ + bld r18, 5 /* now: r18 == length/8 (aka. length in bytes) */ + + + movw r26, r22 /* X points to begin of msg */ + tst r18 + breq sha256_lastBlock_post_copy + mov r1, r18 +sha256_lastBlock_copy_loop: + ld r0, X+ + st Z+, r0 + dec r1 + brne sha256_lastBlock_copy_loop +sha256_lastBlock_post_copy: +sha256_lastBlock_insert_stuffing_bit: + ldi r19, 0x80 + mov r0,r19 + ldi r19, 0x07 + and r19, r20 /* if we are in bitmode */ + breq 2f /* no bitmode */ +1: + lsr r0 + dec r19 + brne 1b + ld r19, X +/* maybe we should do some ANDing here, just for safety */ + or r0, r19 +2: + st Z+, r0 + inc r18 + +/* checking stuff here */ + cpi r18, 64-8+1 + brsh 0f + rjmp sha256_lastBlock_insert_zeros +0: + /* oh shit, we landed here */ + /* first we have to fill it up with zeros */ + ldi r19, 64 + sub r19, r18 + breq 2f +1: + st Z+, r1 + dec r19 + brne 1b +2: + sbiw r30, 63 + sbiw r30, 1 + movw r22, r30 + + push r31 + push r30 + push r25 + push r24 + push r21 + push r20 + rcall sha256_nextBlock + pop r20 + pop r21 + pop r24 + pop r25 + pop r30 + pop r31 + + /* now we should subtract 512 from length */ + movw r26, r24 + adiw r26, 4*8+1 /* we can skip the lowest byte */ + ld r19, X + subi r19, hi8(512) + st X+, r19 + ldi r18, 6 +1: + ld r19, X + sbci r19, 0 + st X+, r19 + dec r18 + brne 1b + +; clr r18 /* not neccessary ;-) */ + /* reset Z pointer to begin of block */ + +sha256_lastBlock_insert_zeros: + ldi r19, 64-8 + sub r19, r18 + breq sha256_lastBlock_insert_length + clr r1 +1: + st Z+, r1 /* r1 is still zero */ + dec r19 + brne 1b + +; rjmp sha256_lastBlock_epilog +sha256_lastBlock_insert_length: + movw r26, r24 /* X points to state */ + adiw r26, 8*4 /* X points to (state.length) */ + adiw r30, 8 /* Z points one after the last byte of block */ + ld r0, X+ + add r0, r20 + st -Z, r0 + ld r0, X+ + adc r0, r21 + st -Z, r0 + ldi r19, 6 +1: + ld r0, X+ + adc r0, r1 + st -Z, r0 + dec r19 + brne 1b + + sbiw r30, 64-8 + movw r22, r30 + rcall sha256_nextBlock + +sha256_lastBlock_epilog: + in r30, SPL + in r31, SPH + in r1, SREG + adiw r30, 63 ; lo8(64) + adiw r30, 1 ; hi8(64) + cli + out SPL, r30 + out SPH, r31 + out SREG,r1 + clr r1 + clr r0 + ret + +/**/ +;########################################################### + +.global sha256_nextBlock +; === sha256_nextBlock === +; this is the core function for calculating SHA-256 hashes +; param1: the 16-bit pointer to sha256_ctx structure +; given in r25,r24 (r25 is most significant) +; param2: an 16-bit pointer to 64 byte block to hash +; given in r23,r22 +sha256_nextBlock_localSpace = (64+8)*4 ; 64 32-bit values for w array and 8 32-bit values for a array (total 288 byte) + +Bck1 = 12 +Bck2 = 13 +Bck3 = 14 +Bck4 = 15 +Func1 = 22 +Func2 = 23 +Func3 = 24 +Func4 = 25 +Accu1 = 16 +Accu2 = 17 +Accu3 = 18 +Accu4 = 19 +XAccu1 = 8 +XAccu2 = 9 +XAccu3 = 10 +XAccu4 = 11 +T1 = 4 +T2 = 5 +T3 = 6 +T4 = 7 +LoopC = 1 +/* byteorder: high number <--> high significance */ +sha256_nextBlock: + ; initial, let's make some space ready for local vars + push r4 /* replace push & pop by mem ops? */ + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + push r28 + push r29 + in r20, SPL + in r21, SPH + movw r18, r20 ;backup SP +; movw r26, r20 ; X points to free space on stack + movw r30, r22 ; Z points to message + subi r20, lo8(sha256_nextBlock_localSpace) ;sbiw can do only up to 63 + sbci r21, hi8(sha256_nextBlock_localSpace) + movw r26, r20 ; X points to free space on stack + in r0, SREG + cli ; we want to be uninterrupted while updating SP + out SPL, r20 + out SPH, r21 + out SREG, r0 + push r18 + push r19 + push r24 + push r25 /* param1 will be needed later */ + ; now we fill the w array with message (think about endianess) + adiw r26, 1 ; X++ + ldi r20, 16 +sha256_nextBlock_wcpyloop: + ld r23, Z+ + ld r22, Z+ + ld r19, Z+ + ld r18, Z+ + st X+, r18 + st X+, r19 + st X+, r22 + st X+, r23 + dec r20 + brne sha256_nextBlock_wcpyloop +/* for (i=16; i<64; ++i){ + w[i] = SIGMA_b(w[i-2]) + w[i-7] + SIGMA_a(w[i-15]) + w[i-16]; + } */ + /* r25,r24,r23,r24 (r21,r20) are function values + r19,r18,r17,r16 are the accumulator + r15,r14,r13,rBck1 are backup1 + r11,r10,r9 ,r8 are xor accu + r1 is round counter */ + + ldi r20, 64-16 + mov LoopC, r20 +sha256_nextBlock_wcalcloop: + movw r30, r26 ; cp X to Z + sbiw r30, 63 + sbiw r30, 1 ; substract 64 = 16*4 + ld Accu1, Z+ + ld Accu2, Z+ + ld Accu3, Z+ + ld Accu4, Z+ /* w[i] = w[i-16] */ + ld Bck1, Z+ + ld Bck2, Z+ + ld Bck3, Z+ + ld Bck4, Z+ /* backup = w[i-15] */ + /* now sigma 0 */ + mov Func1, Bck2 + mov Func2, Bck3 + mov Func3, Bck4 + mov Func4, Bck1 /* prerotated by 8 */ + ldi r20, 1 + rcall bitrotl + movw XAccu1, Func1 + movw XAccu3, Func3 /* store ROTR(w[i-15],7) in xor accu */ + movw Func1, Bck3 + movw Func3, Bck1 /* prerotated by 16 */ + ldi r20, 2 + rcall bitrotr + eor XAccu1, Func1 /* xor ROTR(w[i-15], 18)*/ + eor XAccu2, Func2 + eor XAccu3, Func3 + eor XAccu4, Func4 + ldi Func2, 3 /* now shr3 */ /*we can destroy backup now*/ +sigma0_shr: + lsr Bck4 + ror Bck3 + ror Bck2 + ror Bck1 + dec Func2 + brne sigma0_shr + eor XAccu1, Bck1 + eor XAccu2, Bck2 + eor XAccu3, Bck3 + eor XAccu4, Bck4 /* xor SHR(w[i-15], 3)*/ /* xor accu == sigma1(w[i-15]) */ + add Accu1, XAccu1 + adc Accu2, XAccu2 + adc Accu3, XAccu3 + adc Accu4, XAccu4 /* finished with sigma0 */ + ldd Func1, Z+7*4 /* now accu += w[i-7] */ + ldd Func2, Z+7*4+1 + ldd Func3, Z+7*4+2 + ldd Func4, Z+7*4+3 + add Accu1, Func1 + adc Accu2, Func2 + adc Accu3, Func3 + adc Accu4, Func4 + ldd Bck1, Z+12*4 /* now backup = w[i-2]*/ + ldd Bck2, Z+12*4+1 + ldd Bck3, Z+12*4+2 + ldd Bck4, Z+12*4+3 + /* now sigma 1 */ + movw Func1, Bck3 + movw Func3, Bck1 /* prerotated by 16 */ + ldi r20, 1 + rcall bitrotr + movw XAccu3, Func3 + movw XAccu1, Func1 /* store in ROTR(w[i-2], 17) xor accu */ +; movw Func1, Bck3 +; movw Func3, Bck1 /* prerotated by 16 */ + ldi r20, 2 + rcall bitrotr + eor XAccu1, Func1 /* xor ROTR(w[i-2], 19)*/ + eor XAccu2, Func2 + eor XAccu3, Func3 + eor XAccu4, Func4 + ldi Func2, 2 /* now shr10 (dirty trick, skipping a byte) */ /*we can destroy backup now*/ +sigma1_shr: + lsr Bck4 + ror Bck3 + ror Bck2 + dec Func2 + brne sigma1_shr + eor XAccu1, Bck2 + eor XAccu2, Bck3 + eor XAccu3, Bck4 /* xor SHR(w[i-2], 10)*/ /* xor accu == sigma1(w[i-15]) */ + add Accu1, XAccu1 + adc Accu2, XAccu2 + adc Accu3, XAccu3 + adc Accu4, XAccu4 /* finished with sigma0 */ + /* now let's store the shit */ + st X+, Accu1 + st X+, Accu2 + st X+, Accu3 + st X+, Accu4 + dec LoopC + breq 3f ; skip if zero + rjmp sha256_nextBlock_wcalcloop +3: + /* we are finished with w array X points one byte post w */ +/* init a array */ + pop r31 + pop r30 + push r30 + push r31 + ldi r25, 8*4 /* 8 32-bit values to copy from ctx to a array */ +init_a_array: + ld r1, Z+ + st X+, r1 + dec r25 + brne init_a_array + +/* now the real fun begins */ +/* for (i=0; i<64; ++i){ + t1 = a[7] + SIGMA1(a[4]) + CH(a[4],a[5],a[6]) + k[i] + w[i]; + t2 = SIGMA0(a[0]) + MAJ(a[0],a[1],a[2]); + memmove(&(a[1]), &(a[0]), 7*4); // a[7]=a[6]; a[6]=a[5]; a[5]=a[4]; a[4]=a[3]; a[3]=a[2]; a[2]=a[1]; a[1]=a[0]; + a[4] += t1; + a[0] = t1 + t2; + } */ + /* Y points to a[0], Z ('cause lpm wants it) points to k[i], X points to w[i] */ + sbiw r26, 8*4 /* X still points at a[7]+1*/ + movw r28, r26 + ldi r30, lo8(sha256_kv) + ldi r31, hi8(sha256_kv) + dec r27 /* X - (64*4 == 256) */ + ldi r25, 64 + mov LoopC, r25 +sha256_main_loop: + /* now calculate t1 */ + /*CH(x,y,z) = (x&y)^((~x)&z)*/ + ldd T1, Y+5*4 + ldd T2, Y+5*4+1 + ldd T3, Y+5*4+2 + ldd T4, Y+5*4+3 /* y in T */ + ldd Func1, Y+4*4 + ldd Func2, Y+4*4+1 + ldd Func3, Y+4*4+2 + ldd Func4, Y+4*4+3 /* x in Func */ + ldd Bck1, Y+6*4 + ldd Bck2, Y+6*4+1 + ldd Bck3, Y+6*4+2 + ldd Bck4, Y+6*4+3 /* z in Bck */ + and T1, Func1 + and T2, Func2 + and T3, Func3 + and T4, Func4 + com Func1 + com Func2 + com Func3 + com Func4 + and Bck1, Func1 + and Bck2, Func2 + and Bck3, Func3 + and Bck4, Func4 + eor T1, Bck1 + eor T2, Bck2 + eor T3, Bck3 + eor T4, Bck4 /* done, CH(x,y,z) is in T */ + /* now SIGMA1(a[4]) */ + ldd Bck4, Y+4*4 /* think about using it from Func reg above*/ + ldd Bck1, Y+4*4+1 + ldd Bck2, Y+4*4+2 + ldd Bck3, Y+4*4+3 /* load prerotate by 8-bit */ + movw Func1, Bck1 + movw Func3, Bck3 + ldi r20, 2 + rcall bitrotl /* rotr(x,6) */ + movw XAccu1, Func1 + movw XAccu3, Func3 + movw Func1, Bck1 + movw Func3, Bck3 + ldi r20, 3 + rcall bitrotr /* rotr(x,11) */ + eor XAccu1, Func1 + eor XAccu2, Func2 + eor XAccu3, Func3 + eor XAccu4, Func4 + movw Func1, Bck3 /* this prerotates furteh 16 bits*/ + movw Func3, Bck1 /* so we have now prerotated by 24 bits*/ + ldi r20, 1 + rcall bitrotr /* rotr(x,11) */ + eor XAccu1, Func1 + eor XAccu2, Func2 + eor XAccu3, Func3 + eor XAccu4, Func4 /* finished with SIGMA1, add it to T */ + add T1, XAccu1 + adc T2, XAccu2 + adc T3, XAccu3 + adc T4, XAccu4 + /* now we've to add a[7], w[i] and k[i] */ + ldd XAccu1, Y+4*7 + ldd XAccu2, Y+4*7+1 + ldd XAccu3, Y+4*7+2 + ldd XAccu4, Y+4*7+3 + add T1, XAccu1 + adc T2, XAccu2 + adc T3, XAccu3 + adc T4, XAccu4 /* add a[7] */ + ld XAccu1, X+ + ld XAccu2, X+ + ld XAccu3, X+ + ld XAccu4, X+ + add T1, XAccu1 + adc T2, XAccu2 + adc T3, XAccu3 + adc T4, XAccu4 /* add w[i] */ + lpm XAccu1, Z+ + lpm XAccu2, Z+ + lpm XAccu3, Z+ + lpm XAccu4, Z+ + add T1, XAccu1 + adc T2, XAccu2 + adc T3, XAccu3 + adc T4, XAccu4 /* add k[i] */ /* finished with t1 */ + /*now t2 = SIGMA0(a[0]) + MAJ(a[0],a[1],a[2]) */ /*i did to much x86 asm, i always see 4 32bit regs*/ + /* starting with MAJ(x,y,z) */ + ldd Func1, Y+4*0+0 + ldd Func2, Y+4*0+1 + ldd Func3, Y+4*0+2 + ldd Func4, Y+4*0+3 /* load x=a[0] */ + ldd XAccu1, Y+4*1+0 + ldd XAccu2, Y+4*1+1 + ldd XAccu3, Y+4*1+2 + ldd XAccu4, Y+4*1+3 /* load y=a[1] */ + and XAccu1, Func1 + and XAccu2, Func2 + and XAccu3, Func3 + and XAccu4, Func4 /* XAccu == (x & y) */ + ldd Bck1, Y+4*2+0 + ldd Bck2, Y+4*2+1 + ldd Bck3, Y+4*2+2 + ldd Bck4, Y+4*2+3 /* load z=a[2] */ + and Func1, Bck1 + and Func2, Bck2 + and Func3, Bck3 + and Func4, Bck4 + eor XAccu1, Func1 + eor XAccu2, Func2 + eor XAccu3, Func3 + eor XAccu4, Func4 /* XAccu == (x & y) ^ (x & z) */ + ldd Func1, Y+4*1+0 + ldd Func2, Y+4*1+1 + ldd Func3, Y+4*1+2 + ldd Func4, Y+4*1+3 /* load y=a[1] */ + and Func1, Bck1 + and Func2, Bck2 + and Func3, Bck3 + and Func4, Bck4 + eor XAccu1, Func1 + eor XAccu2, Func2 + eor XAccu3, Func3 + eor XAccu4, Func4 /* XAccu == Maj(x,y,z) == (x & y) ^ (x & z) ^ (y & z) */ + /* SIGMA0(a[0]) */ + ldd Bck1, Y+4*0+0 /* we should combine this with above */ + ldd Bck2, Y+4*0+1 + ldd Bck3, Y+4*0+2 + ldd Bck4, Y+4*0+3 + movw Func1, Bck1 + movw Func3, Bck3 + ldi r20, 2 + rcall bitrotr + movw Accu1, Func1 + movw Accu3, Func3 /* Accu = shr(a[0], 2) */ + movw Func1, Bck3 + movw Func3, Bck1 /* prerotate by 16 bits */ + ldi r20, 3 + rcall bitrotl + eor Accu1, Func1 + eor Accu2, Func2 + eor Accu3, Func3 + eor Accu4, Func4 /* Accu ^= shr(a[0], 13) */ + mov Func1, Bck4 + mov Func2, Bck1 + mov Func3, Bck2 + mov Func4, Bck3 /* prerotate by 24 bits */ + ldi r20, 2 + rcall bitrotl + eor Accu1, Func1 + eor Accu2, Func2 + eor Accu3, Func3 + eor Accu4, Func4 /* Accu ^= shr(a[0], 22) */ + add Accu1, XAccu1 /* add previous result (MAJ)*/ + adc Accu2, XAccu2 + adc Accu3, XAccu3 + adc Accu4, XAccu4 + /* now we are finished with the computing stuff (t1 in T, t2 in Accu)*/ + /* a[7]=a[6]; a[6]=a[5]; a[5]=a[4]; a[4]=a[3]; a[3]=a[2]; a[2]=a[1]; a[1]=a[0]; */ + + ldi r21, 7*4 + adiw r28, 7*4 +a_shift_loop: + ld r25, -Y /* warning: this is PREdecrement */ + std Y+4, r25 + dec r21 + brne a_shift_loop + + ldd Bck1, Y+4*4+0 + ldd Bck2, Y+4*4+1 + ldd Bck3, Y+4*4+2 + ldd Bck4, Y+4*4+3 + add Bck1, T1 + adc Bck2, T2 + adc Bck3, T3 + adc Bck4, T4 + std Y+4*4+0, Bck1 + std Y+4*4+1, Bck2 + std Y+4*4+2, Bck3 + std Y+4*4+3, Bck4 + add Accu1, T1 + adc Accu2, T2 + adc Accu3, T3 + adc Accu4, T4 + std Y+4*0+0, Accu1 + std Y+4*0+1, Accu2 + std Y+4*0+2, Accu3 + std Y+4*0+3, Accu4 /* a array updated */ + + + dec LoopC + breq update_state + rjmp sha256_main_loop ;brne sha256_main_loop +update_state: + /* update state */ + /* pointers to state should still exist on the stack ;-) */ + pop r31 + pop r30 + ldi r21, 8 +update_state_loop: + ldd Accu1, Z+0 + ldd Accu2, Z+1 + ldd Accu3, Z+2 + ldd Accu4, Z+3 + ld Func1, Y+ + ld Func2, Y+ + ld Func3, Y+ + ld Func4, Y+ + add Accu1, Func1 + adc Accu2, Func2 + adc Accu3, Func3 + adc Accu4, Func4 + st Z+, Accu1 + st Z+, Accu2 + st Z+, Accu3 + st Z+, Accu4 + dec r21 + brne update_state_loop + /* now we just have to update the length */ + adiw r30, 1 /* since we add 512, we can simply skip the LSB */ + ldi r21, 2 + ldi r22, 6 + ld r20, Z + add r20, r21 + st Z+, r20 + clr r21 +sha256_nextBlock_fix_length: + brcc sha256_nextBlock_epilog + ld r20, Z + adc r20, r21 + st Z+, r20 + dec r22 + brne sha256_nextBlock_fix_length + +; EPILOG +sha256_nextBlock_epilog: +/* now we should clean up the stack */ + + pop r21 + pop r20 + in r0, SREG + cli ; we want to be uninterrupted while updating SP + out SPL, r20 + out SPH, r21 + out SREG, r0 + + clr r1 + pop r29 + pop r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + ret + +sha256_kv: ; round-key-vector stored in ProgMem +.word 0x2f98, 0x428a, 0x4491, 0x7137, 0xfbcf, 0xb5c0, 0xdba5, 0xe9b5, 0xc25b, 0x3956, 0x11f1, 0x59f1, 0x82a4, 0x923f, 0x5ed5, 0xab1c +.word 0xaa98, 0xd807, 0x5b01, 0x1283, 0x85be, 0x2431, 0x7dc3, 0x550c, 0x5d74, 0x72be, 0xb1fe, 0x80de, 0x06a7, 0x9bdc, 0xf174, 0xc19b +.word 0x69c1, 0xe49b, 0x4786, 0xefbe, 0x9dc6, 0x0fc1, 0xa1cc, 0x240c, 0x2c6f, 0x2de9, 0x84aa, 0x4a74, 0xa9dc, 0x5cb0, 0x88da, 0x76f9 +.word 0x5152, 0x983e, 0xc66d, 0xa831, 0x27c8, 0xb003, 0x7fc7, 0xbf59, 0x0bf3, 0xc6e0, 0x9147, 0xd5a7, 0x6351, 0x06ca, 0x2967, 0x1429 +.word 0x0a85, 0x27b7, 0x2138, 0x2e1b, 0x6dfc, 0x4d2c, 0x0d13, 0x5338, 0x7354, 0x650a, 0x0abb, 0x766a, 0xc92e, 0x81c2, 0x2c85, 0x9272 +.word 0xe8a1, 0xa2bf, 0x664b, 0xa81a, 0x8b70, 0xc24b, 0x51a3, 0xc76c, 0xe819, 0xd192, 0x0624, 0xd699, 0x3585, 0xf40e, 0xa070, 0x106a +.word 0xc116, 0x19a4, 0x6c08, 0x1e37, 0x774c, 0x2748, 0xbcb5, 0x34b0, 0x0cb3, 0x391c, 0xaa4a, 0x4ed8, 0xca4f, 0x5b9c, 0x6ff3, 0x682e +.word 0x82ee, 0x748f, 0x636f, 0x78a5, 0x7814, 0x84c8, 0x0208, 0x8cc7, 0xfffa, 0x90be, 0x6ceb, 0xa450, 0xa3f7, 0xbef9, 0x78f2, 0xc671 + + +;########################################################### + +.global sha256_init +;uint32_t sha256_init_vector[]={ +; 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, +; 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19 }; +; +;void sha256_init(sha256_ctx_t *state){ +; state->length=0; +; memcpy(state->h, sha256_init_vector, 8*4); +;} +; param1: (r23,r24) 16-bit pointer to sha256_ctx_t struct in ram +; modifys: Z(r30,r31), Func1, r22 +sha256_init: + movw r26, r24 ; (24,25) --> (26,27) load X with param1 + ldi r30, lo8((sha256_init_vector)) + ldi r31, hi8((sha256_init_vector)) + ldi r22, 32+8 +sha256_init_vloop: + lpm r23, Z+ + st X+, r23 + dec r22 + brne sha256_init_vloop + ret + +sha256_init_vector: +.word 0xE667, 0x6A09 +.word 0xAE85, 0xBB67 +.word 0xF372, 0x3C6E +.word 0xF53A, 0xA54F +.word 0x527F, 0x510E +.word 0x688C, 0x9B05 +.word 0xD9AB, 0x1F83 +.word 0xCD19, 0x5BE0 +.word 0x0000, 0x0000 +.word 0x0000, 0x0000 + +;########################################################### + +.global rotl32 +; === ROTL32 === +; function that rotates a 32 bit word to the left +; param1: the 32-bit word to rotate +; given in r25,r24,r23,r22 (r25 is most significant) +; param2: an 8-bit value telling how often to rotate +; given in r20 +; modifys: r21, r22 +rotl32: + cpi r20, 8 + brlo bitrotl + mov r21, r25 + mov r25, r24 + mov r24, r23 + mov r23, r22 + mov r22, r21 + subi r20, 8 + rjmp rotl32 +bitrotl: + clr r21 + clc +bitrotl_loop: + tst r20 + breq fixrotl + rol r22 + rol r23 + rol r24 + rol r25 + rol r21 + dec r20 + rjmp bitrotl_loop +fixrotl: + or r22, r21 + ret + + +;########################################################### + +.global rotr32 +; === ROTR32 === +; function that rotates a 32 bit word to the right +; param1: the 32-bit word to rotate +; given in r25,r24,r23,22 (r25 is most significant) +; param2: an 8-bit value telling how often to rotate +; given in r20 +; modifys: r21, r22 +rotr32: + cpi r20, 8 + brlo bitrotr + mov r21, r22 + mov r22, r23 + mov r23, r24 + mov r24, r25 + mov r25, r21 + subi r20, 8 + rjmp rotr32 +bitrotr: + clr r21 + clc +bitrotr_loop: + tst r20 + breq fixrotr + ror r25 + ror r24 + ror r23 + ror r22 + ror r21 + dec r20 + rjmp bitrotr_loop +fixrotr: + or r25, r21 + ret + + +;########################################################### + +.global change_endian32 +; === change_endian32 === +; function that changes the endianess of a 32-bit word +; param1: the 32-bit word +; given in r25,r24,r23,22 (r25 is most significant) +; modifys: r21, r22 +change_endian32: + movw r20, r22 ; (r22,r23) --> (r20,r21) + mov r22, r25 + mov r23, r24 + mov r24, r21 + mov r25, r20 + ret + diff --git a/sha256.c b/sha256/sha256.c similarity index 100% rename from sha256.c rename to sha256/sha256.c diff --git a/sha256/sha256.h b/sha256/sha256.h new file mode 100644 index 0000000..24960a3 --- /dev/null +++ b/sha256/sha256.h @@ -0,0 +1,122 @@ +/* sha256.h */ +/* + This file is part of the AVR-Crypto-Lib. + Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ +/** + * \file sha256.h + * \author Daniel Otte + * \date 2006-05-16 + * \license GPLv3 or later + * + */ + +#ifndef SHA256_H_ +#define SHA256_H_ + +#define __LITTLE_ENDIAN__ + + +#include + +/** \def SHA256_HASH_BITS + * defines the size of a SHA-256 hash value in bits + */ + +/** \def SHA256_HASH_BYTES + * defines the size of a SHA-256 hash value in bytes + */ + +/** \def SHA256_BLOCK_BITS + * defines the size of a SHA-256 input block in bits + */ + +/** \def SHA256_BLOCK_BYTES + * defines the size of a SHA-256 input block in bytes + */ + +#define SHA256_HASH_BITS 256 +#define SHA256_HASH_BYTES (SHA256_HASH_BITS/8) +#define SHA256_BLOCK_BITS 512 +#define SHA256_BLOCK_BYTES (SHA256_BLOCK_BITS/8) + +/** \typedef sha256_ctx_t + * \brief SHA-256 context type + * + * A variable of this type may hold the state of a SHA-256 hashing process + */ +typedef struct { + uint32_t h[8]; + uint64_t length; +} sha256_ctx_t; + +/** \typedef sha256_hash_t + * \brief SHA-256 hash value type + * + * A variable of this type may hold the hash value produced by the + * sha256_ctx2hash(sha256_hash_t* dest, const sha256_ctx_t* state) function. + */ +typedef uint8_t sha256_hash_t[SHA256_HASH_BYTES]; + +/** \fn void sha256_init(sha256_ctx_t *state) + * \brief initialise a SHA-256 context + * + * This function sets a ::sha256_ctx_t to the initial values for hashing. + * \param state pointer to the SHA-256 hashing context + */ +void sha256_init(sha256_ctx_t *state); + +/** \fn void sha256_nextBlock (sha256_ctx_t* state, const void* block) + * \brief update the context with a given block + * + * This function updates the SHA-256 hash context by processing the given block + * of fixed length. + * \param state pointer to the SHA-256 hash context + * \param block pointer to the block of fixed length (512 bit = 64 byte) + */ +void sha256_nextBlock (sha256_ctx_t* state, const void* block); + +/** \fn void sha256_lastBlock(sha256_ctx_t* state, const void* block, uint16_t length_b) + * \brief finalize the context with the given block + * + * This function finalizes the SHA-256 hash context by processing the given block + * of variable length. + * \param state pointer to the SHA-256 hash context + * \param block pointer to the block of fixed length (512 bit = 64 byte) + * \param length_b the length of the block in bits + */ +void sha256_lastBlock(sha256_ctx_t* state, const void* block, uint16_t length_b); + +/** \fn void sha256_ctx2hash(sha256_hash_t* dest, const sha256_ctx_t* state) + * \brief convert the hash state into the hash value + * This function reads the context and writes the hash value to the destination + * \param dest pointer to the location where the hash value should be written + * \param state pointer to the SHA-256 hash context + */ +void sha256_ctx2hash(sha256_hash_t* dest, const sha256_ctx_t* state); + +/** \fn void sha256(sha256_hash_t* dest, const void* msg, uint32_t length_b) + * \brief simple SHA-256 hashing function for direct hashing + * + * This function automaticaly hashes a given message of arbitary length with + * the SHA-256 hashing algorithm. + * \param dest pointer to the location where the hash value is going to be written to + * \param msg pointer to the message thats going to be hashed + * \param length_b length of the message in bits + */ +void sha256(sha256_hash_t* dest, const void* msg, uint32_t length_b); + +#endif /*SHA256_H_*/ diff --git a/shabea/memxor.S b/shabea/memxor.S new file mode 100644 index 0000000..a32058b --- /dev/null +++ b/shabea/memxor.S @@ -0,0 +1,66 @@ +/* memxor.S */ +/* + This file is part of the AVR-Crypto-Lib. + Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +/* + * File: memxor.S + * Author: Daniel Otte + * Date: 2008-08-07 + * License: GPLv3 or later + * Description: memxor, XORing one block into another + * + */ + +/* + * void memxor(void* dest, const void* src, uint16_t n); + */ + /* + * param dest is passed in r24:r25 + * param src is passed in r22:r23 + * param n is passed in r20:r21 + */ +.global memxor +memxor: + movw r30, r24 + movw r26, r22 + movw r24, r20 + adiw r24, 0 + breq 2f +1: + ld r20, X+ + ld r21, Z + eor r20, r21 + st Z+, r20 + sbiw r24, 1 + brne 1b +2: + ret + + + + + + + + + + + + + + diff --git a/shabea/memxor.h b/shabea/memxor.h new file mode 100644 index 0000000..a62a616 --- /dev/null +++ b/shabea/memxor.h @@ -0,0 +1,7 @@ +#ifndef MEMXOR_H_ +#define MEMXOR_H_ +#include + +void memxor(void* dest, const void* src, uint16_t n); + +#endif diff --git a/shabea/sha256-asm.S b/shabea/sha256-asm.S new file mode 100644 index 0000000..d9eb6b6 --- /dev/null +++ b/shabea/sha256-asm.S @@ -0,0 +1,1042 @@ +/* sha256-asm.S */ +/* + This file is part of the AVR-Crypto-Lib. + Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ +/* + * Author: Daniel Otte + * + * License: GPLv3 or later +*/ +; sha-256 implementation in assembler +SHA256_BLOCK_BITS = 512 +SHA256_HASH_BITS = 256 + +.macro precall + /* push r18 - r27, r30 - r31*/ + push r0 + push r1 + push r18 + push r19 + push r20 + push r21 + push r22 + push r23 + push r24 + push r25 + push r26 + push r27 + push r30 + push r31 + clr r1 +.endm + +.macro postcall + pop r31 + pop r30 + pop r27 + pop r26 + pop r25 + pop r24 + pop r23 + pop r22 + pop r21 + pop r20 + pop r19 + pop r18 + pop r1 + pop r0 +.endm + + +.macro hexdump length + push r27 + push r26 + ldi r25, '\r' + mov r24, r25 + call uart_putc + ldi r25, '\n' + mov r24, r25 + call uart_putc + pop r26 + pop r27 + movw r24, r26 +.if \length > 16 + ldi r22, lo8(16) + ldi r23, hi8(16) + push r27 + push r26 + call uart_hexdump + pop r26 + pop r27 + adiw r26, 16 + hexdump \length-16 +.else + ldi r22, lo8(\length) + ldi r23, hi8(\length) + call uart_hexdump +.endif +.endm + +/* X points to Block */ +.macro dbg_hexdump length + precall + hexdump \length + postcall +.endm + +.section .text + +SPL = 0x3D +SPH = 0x3E +SREG = 0x3F + + +; +;sha256_ctx_t is: +; +; [h0][h1][h2][h3][h4][h5][h6][h7][length] +; hn is 32 bit large, length is 64 bit large + +;########################################################### + +.global sha256_ctx2hash +; === sha256_ctx2hash === +; this function converts a state into a normal hash (bytestring) +; param1: the 16-bit destination pointer +; given in r25,r24 (r25 is most significant) +; param2: the 16-bit pointer to sha256_ctx structure +; given in r23,r22 +sha256_ctx2hash: + movw r26, r22 + movw r30, r24 + ldi r21, 8 + sbiw r26, 4 +1: + ldi r20, 4 + adiw r26, 8 +2: + ld r0, -X + st Z+, r0 + dec r20 + brne 2b + + dec r21 + brne 1b + + ret + +;########################################################### + +.global sha256 +; === sha256 === +; this function calculates SHA-256 hashes from messages in RAM +; param1: the 16-bit hash destination pointer +; given in r25,r24 (r25 is most significant) +; param2: the 16-bit pointer to message +; given in r23,r22 +; param3: 32-bit length value (length of message in bits) +; given in r21,r20,r19,r18 +sha256: +sha256_prolog: + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r16 + push r17 + in r16, SPL + in r17, SPH + subi r16, 8*4+8 + sbci r17, 0 + in r0, SREG + cli + out SPL, r16 + out SPH, r17 + out SREG, r0 + + push r25 + push r24 + inc r16 + adc r17, r1 + + movw r8, r18 /* backup of length*/ + movw r10, r20 + + movw r12, r22 /* backup pf msg-ptr */ + + movw r24, r16 + rcall sha256_init + /* if length >= 512 */ +1: + tst r11 + brne 4f + tst r10 + brne 4f + mov r19, r9 + cpi r19, 0x02 + brlo 4f + + movw r24, r16 + movw r22, r12 + rcall sha256_nextBlock + ldi r19, 0x64 + add r22, r19 + adc r23, r1 + /* length -= 512 */ + ldi r19, 0x02 + sub r9, r19 + sbc r10, r1 + sbc r11, r1 + rjmp 1b + +4: + movw r24, r16 + movw r22, r12 + movw r20, r8 + rcall sha256_lastBlock + + pop r24 + pop r25 + movw r22, r16 + rcall sha256_ctx2hash + +sha256_epilog: + in r30, SPL + in r31, SPH + adiw r30, 8*4+8 + in r0, SREG + cli + out SPL, r30 + out SPH, r31 + out SREG, r0 + pop r17 + pop r16 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + ret + +;########################################################### + + +; block MUST NOT be larger than 64 bytes + +.global sha256_lastBlock +; === sha256_lastBlock === +; this function does padding & Co. for calculating SHA-256 hashes +; param1: the 16-bit pointer to sha256_ctx structure +; given in r25,r24 (r25 is most significant) +; param2: an 16-bit pointer to 64 byte block to hash +; given in r23,r22 +; param3: an 16-bit integer specifing length of block in bits +; given in r21,r20 +sha256_lastBlock_localSpace = (SHA256_BLOCK_BITS/8+1) + + +sha256_lastBlock: + cpi r21, 0x02 + brlo sha256_lastBlock_prolog + push r25 + push r24 + push r23 + push r22 + push r21 + push r20 + rcall sha256_nextBlock + pop r20 + pop r21 + pop r22 + pop r23 + pop r24 + pop r25 + subi r21, 0x02 + subi r23, -2 + rjmp sha256_lastBlock +sha256_lastBlock_prolog: + /* allocate space on stack */ + in r30, SPL + in r31, SPH + in r1, SREG + subi r30, lo8(64) + sbci r31, hi8(64) + cli + out SPL, r30 + out SPH, r31 + out SREG,r1 + + adiw r30, 1 /* SP points to next free byte on stack */ + mov r18, r20 /* r20 = LSB(length) */ + lsr r18 + lsr r18 + lsr r18 + bst r21, 0 /* may be we should explain this ... */ + bld r18, 5 /* now: r18 == length/8 (aka. length in bytes) */ + + + movw r26, r22 /* X points to begin of msg */ + tst r18 + breq sha256_lastBlock_post_copy + mov r1, r18 +sha256_lastBlock_copy_loop: + ld r0, X+ + st Z+, r0 + dec r1 + brne sha256_lastBlock_copy_loop +sha256_lastBlock_post_copy: +sha256_lastBlock_insert_stuffing_bit: + ldi r19, 0x80 + mov r0,r19 + ldi r19, 0x07 + and r19, r20 /* if we are in bitmode */ + breq 2f /* no bitmode */ +1: + lsr r0 + dec r19 + brne 1b + ld r19, X +/* maybe we should do some ANDing here, just for safety */ + or r0, r19 +2: + st Z+, r0 + inc r18 + +/* checking stuff here */ + cpi r18, 64-8+1 + brsh 0f + rjmp sha256_lastBlock_insert_zeros +0: + /* oh shit, we landed here */ + /* first we have to fill it up with zeros */ + ldi r19, 64 + sub r19, r18 + breq 2f +1: + st Z+, r1 + dec r19 + brne 1b +2: + sbiw r30, 63 + sbiw r30, 1 + movw r22, r30 + + push r31 + push r30 + push r25 + push r24 + push r21 + push r20 + rcall sha256_nextBlock + pop r20 + pop r21 + pop r24 + pop r25 + pop r30 + pop r31 + + /* now we should subtract 512 from length */ + movw r26, r24 + adiw r26, 4*8+1 /* we can skip the lowest byte */ + ld r19, X + subi r19, hi8(512) + st X+, r19 + ldi r18, 6 +1: + ld r19, X + sbci r19, 0 + st X+, r19 + dec r18 + brne 1b + +; clr r18 /* not neccessary ;-) */ + /* reset Z pointer to begin of block */ + +sha256_lastBlock_insert_zeros: + ldi r19, 64-8 + sub r19, r18 + breq sha256_lastBlock_insert_length + clr r1 +1: + st Z+, r1 /* r1 is still zero */ + dec r19 + brne 1b + +; rjmp sha256_lastBlock_epilog +sha256_lastBlock_insert_length: + movw r26, r24 /* X points to state */ + adiw r26, 8*4 /* X points to (state.length) */ + adiw r30, 8 /* Z points one after the last byte of block */ + ld r0, X+ + add r0, r20 + st -Z, r0 + ld r0, X+ + adc r0, r21 + st -Z, r0 + ldi r19, 6 +1: + ld r0, X+ + adc r0, r1 + st -Z, r0 + dec r19 + brne 1b + + sbiw r30, 64-8 + movw r22, r30 + rcall sha256_nextBlock + +sha256_lastBlock_epilog: + in r30, SPL + in r31, SPH + in r1, SREG + adiw r30, 63 ; lo8(64) + adiw r30, 1 ; hi8(64) + cli + out SPL, r30 + out SPH, r31 + out SREG,r1 + clr r1 + clr r0 + ret + +/**/ +;########################################################### + +.global sha256_nextBlock +; === sha256_nextBlock === +; this is the core function for calculating SHA-256 hashes +; param1: the 16-bit pointer to sha256_ctx structure +; given in r25,r24 (r25 is most significant) +; param2: an 16-bit pointer to 64 byte block to hash +; given in r23,r22 +sha256_nextBlock_localSpace = (64+8)*4 ; 64 32-bit values for w array and 8 32-bit values for a array (total 288 byte) + +Bck1 = 12 +Bck2 = 13 +Bck3 = 14 +Bck4 = 15 +Func1 = 22 +Func2 = 23 +Func3 = 24 +Func4 = 25 +Accu1 = 16 +Accu2 = 17 +Accu3 = 18 +Accu4 = 19 +XAccu1 = 8 +XAccu2 = 9 +XAccu3 = 10 +XAccu4 = 11 +T1 = 4 +T2 = 5 +T3 = 6 +T4 = 7 +LoopC = 1 +/* byteorder: high number <--> high significance */ +sha256_nextBlock: + ; initial, let's make some space ready for local vars + push r4 /* replace push & pop by mem ops? */ + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + push r28 + push r29 + in r20, SPL + in r21, SPH + movw r18, r20 ;backup SP +; movw r26, r20 ; X points to free space on stack + movw r30, r22 ; Z points to message + subi r20, lo8(sha256_nextBlock_localSpace) ;sbiw can do only up to 63 + sbci r21, hi8(sha256_nextBlock_localSpace) + movw r26, r20 ; X points to free space on stack + in r0, SREG + cli ; we want to be uninterrupted while updating SP + out SPL, r20 + out SPH, r21 + out SREG, r0 + push r18 + push r19 + push r24 + push r25 /* param1 will be needed later */ + ; now we fill the w array with message (think about endianess) + adiw r26, 1 ; X++ + ldi r20, 16 +sha256_nextBlock_wcpyloop: + ld r23, Z+ + ld r22, Z+ + ld r19, Z+ + ld r18, Z+ + st X+, r18 + st X+, r19 + st X+, r22 + st X+, r23 + dec r20 + brne sha256_nextBlock_wcpyloop +/* for (i=16; i<64; ++i){ + w[i] = SIGMA_b(w[i-2]) + w[i-7] + SIGMA_a(w[i-15]) + w[i-16]; + } */ + /* r25,r24,r23,r24 (r21,r20) are function values + r19,r18,r17,r16 are the accumulator + r15,r14,r13,rBck1 are backup1 + r11,r10,r9 ,r8 are xor accu + r1 is round counter */ + + ldi r20, 64-16 + mov LoopC, r20 +sha256_nextBlock_wcalcloop: + movw r30, r26 ; cp X to Z + sbiw r30, 63 + sbiw r30, 1 ; substract 64 = 16*4 + ld Accu1, Z+ + ld Accu2, Z+ + ld Accu3, Z+ + ld Accu4, Z+ /* w[i] = w[i-16] */ + ld Bck1, Z+ + ld Bck2, Z+ + ld Bck3, Z+ + ld Bck4, Z+ /* backup = w[i-15] */ + /* now sigma 0 */ + mov Func1, Bck2 + mov Func2, Bck3 + mov Func3, Bck4 + mov Func4, Bck1 /* prerotated by 8 */ + ldi r20, 1 + rcall bitrotl + movw XAccu1, Func1 + movw XAccu3, Func3 /* store ROTR(w[i-15],7) in xor accu */ + movw Func1, Bck3 + movw Func3, Bck1 /* prerotated by 16 */ + ldi r20, 2 + rcall bitrotr + eor XAccu1, Func1 /* xor ROTR(w[i-15], 18)*/ + eor XAccu2, Func2 + eor XAccu3, Func3 + eor XAccu4, Func4 + ldi Func2, 3 /* now shr3 */ /*we can destroy backup now*/ +sigma0_shr: + lsr Bck4 + ror Bck3 + ror Bck2 + ror Bck1 + dec Func2 + brne sigma0_shr + eor XAccu1, Bck1 + eor XAccu2, Bck2 + eor XAccu3, Bck3 + eor XAccu4, Bck4 /* xor SHR(w[i-15], 3)*/ /* xor accu == sigma1(w[i-15]) */ + add Accu1, XAccu1 + adc Accu2, XAccu2 + adc Accu3, XAccu3 + adc Accu4, XAccu4 /* finished with sigma0 */ + ldd Func1, Z+7*4 /* now accu += w[i-7] */ + ldd Func2, Z+7*4+1 + ldd Func3, Z+7*4+2 + ldd Func4, Z+7*4+3 + add Accu1, Func1 + adc Accu2, Func2 + adc Accu3, Func3 + adc Accu4, Func4 + ldd Bck1, Z+12*4 /* now backup = w[i-2]*/ + ldd Bck2, Z+12*4+1 + ldd Bck3, Z+12*4+2 + ldd Bck4, Z+12*4+3 + /* now sigma 1 */ + movw Func1, Bck3 + movw Func3, Bck1 /* prerotated by 16 */ + ldi r20, 1 + rcall bitrotr + movw XAccu3, Func3 + movw XAccu1, Func1 /* store in ROTR(w[i-2], 17) xor accu */ +; movw Func1, Bck3 +; movw Func3, Bck1 /* prerotated by 16 */ + ldi r20, 2 + rcall bitrotr + eor XAccu1, Func1 /* xor ROTR(w[i-2], 19)*/ + eor XAccu2, Func2 + eor XAccu3, Func3 + eor XAccu4, Func4 + ldi Func2, 2 /* now shr10 (dirty trick, skipping a byte) */ /*we can destroy backup now*/ +sigma1_shr: + lsr Bck4 + ror Bck3 + ror Bck2 + dec Func2 + brne sigma1_shr + eor XAccu1, Bck2 + eor XAccu2, Bck3 + eor XAccu3, Bck4 /* xor SHR(w[i-2], 10)*/ /* xor accu == sigma1(w[i-15]) */ + add Accu1, XAccu1 + adc Accu2, XAccu2 + adc Accu3, XAccu3 + adc Accu4, XAccu4 /* finished with sigma0 */ + /* now let's store the shit */ + st X+, Accu1 + st X+, Accu2 + st X+, Accu3 + st X+, Accu4 + dec LoopC + breq 3f ; skip if zero + rjmp sha256_nextBlock_wcalcloop +3: + /* we are finished with w array X points one byte post w */ +/* init a array */ + pop r31 + pop r30 + push r30 + push r31 + ldi r25, 8*4 /* 8 32-bit values to copy from ctx to a array */ +init_a_array: + ld r1, Z+ + st X+, r1 + dec r25 + brne init_a_array + +/* now the real fun begins */ +/* for (i=0; i<64; ++i){ + t1 = a[7] + SIGMA1(a[4]) + CH(a[4],a[5],a[6]) + k[i] + w[i]; + t2 = SIGMA0(a[0]) + MAJ(a[0],a[1],a[2]); + memmove(&(a[1]), &(a[0]), 7*4); // a[7]=a[6]; a[6]=a[5]; a[5]=a[4]; a[4]=a[3]; a[3]=a[2]; a[2]=a[1]; a[1]=a[0]; + a[4] += t1; + a[0] = t1 + t2; + } */ + /* Y points to a[0], Z ('cause lpm wants it) points to k[i], X points to w[i] */ + sbiw r26, 8*4 /* X still points at a[7]+1*/ + movw r28, r26 + ldi r30, lo8(sha256_kv) + ldi r31, hi8(sha256_kv) + dec r27 /* X - (64*4 == 256) */ + ldi r25, 64 + mov LoopC, r25 +sha256_main_loop: + /* now calculate t1 */ + /*CH(x,y,z) = (x&y)^((~x)&z)*/ + ldd T1, Y+5*4 + ldd T2, Y+5*4+1 + ldd T3, Y+5*4+2 + ldd T4, Y+5*4+3 /* y in T */ + ldd Func1, Y+4*4 + ldd Func2, Y+4*4+1 + ldd Func3, Y+4*4+2 + ldd Func4, Y+4*4+3 /* x in Func */ + ldd Bck1, Y+6*4 + ldd Bck2, Y+6*4+1 + ldd Bck3, Y+6*4+2 + ldd Bck4, Y+6*4+3 /* z in Bck */ + and T1, Func1 + and T2, Func2 + and T3, Func3 + and T4, Func4 + com Func1 + com Func2 + com Func3 + com Func4 + and Bck1, Func1 + and Bck2, Func2 + and Bck3, Func3 + and Bck4, Func4 + eor T1, Bck1 + eor T2, Bck2 + eor T3, Bck3 + eor T4, Bck4 /* done, CH(x,y,z) is in T */ + /* now SIGMA1(a[4]) */ + ldd Bck4, Y+4*4 /* think about using it from Func reg above*/ + ldd Bck1, Y+4*4+1 + ldd Bck2, Y+4*4+2 + ldd Bck3, Y+4*4+3 /* load prerotate by 8-bit */ + movw Func1, Bck1 + movw Func3, Bck3 + ldi r20, 2 + rcall bitrotl /* rotr(x,6) */ + movw XAccu1, Func1 + movw XAccu3, Func3 + movw Func1, Bck1 + movw Func3, Bck3 + ldi r20, 3 + rcall bitrotr /* rotr(x,11) */ + eor XAccu1, Func1 + eor XAccu2, Func2 + eor XAccu3, Func3 + eor XAccu4, Func4 + movw Func1, Bck3 /* this prerotates furteh 16 bits*/ + movw Func3, Bck1 /* so we have now prerotated by 24 bits*/ + ldi r20, 1 + rcall bitrotr /* rotr(x,11) */ + eor XAccu1, Func1 + eor XAccu2, Func2 + eor XAccu3, Func3 + eor XAccu4, Func4 /* finished with SIGMA1, add it to T */ + add T1, XAccu1 + adc T2, XAccu2 + adc T3, XAccu3 + adc T4, XAccu4 + /* now we've to add a[7], w[i] and k[i] */ + ldd XAccu1, Y+4*7 + ldd XAccu2, Y+4*7+1 + ldd XAccu3, Y+4*7+2 + ldd XAccu4, Y+4*7+3 + add T1, XAccu1 + adc T2, XAccu2 + adc T3, XAccu3 + adc T4, XAccu4 /* add a[7] */ + ld XAccu1, X+ + ld XAccu2, X+ + ld XAccu3, X+ + ld XAccu4, X+ + add T1, XAccu1 + adc T2, XAccu2 + adc T3, XAccu3 + adc T4, XAccu4 /* add w[i] */ + lpm XAccu1, Z+ + lpm XAccu2, Z+ + lpm XAccu3, Z+ + lpm XAccu4, Z+ + add T1, XAccu1 + adc T2, XAccu2 + adc T3, XAccu3 + adc T4, XAccu4 /* add k[i] */ /* finished with t1 */ + /*now t2 = SIGMA0(a[0]) + MAJ(a[0],a[1],a[2]) */ /*i did to much x86 asm, i always see 4 32bit regs*/ + /* starting with MAJ(x,y,z) */ + ldd Func1, Y+4*0+0 + ldd Func2, Y+4*0+1 + ldd Func3, Y+4*0+2 + ldd Func4, Y+4*0+3 /* load x=a[0] */ + ldd XAccu1, Y+4*1+0 + ldd XAccu2, Y+4*1+1 + ldd XAccu3, Y+4*1+2 + ldd XAccu4, Y+4*1+3 /* load y=a[1] */ + and XAccu1, Func1 + and XAccu2, Func2 + and XAccu3, Func3 + and XAccu4, Func4 /* XAccu == (x & y) */ + ldd Bck1, Y+4*2+0 + ldd Bck2, Y+4*2+1 + ldd Bck3, Y+4*2+2 + ldd Bck4, Y+4*2+3 /* load z=a[2] */ + and Func1, Bck1 + and Func2, Bck2 + and Func3, Bck3 + and Func4, Bck4 + eor XAccu1, Func1 + eor XAccu2, Func2 + eor XAccu3, Func3 + eor XAccu4, Func4 /* XAccu == (x & y) ^ (x & z) */ + ldd Func1, Y+4*1+0 + ldd Func2, Y+4*1+1 + ldd Func3, Y+4*1+2 + ldd Func4, Y+4*1+3 /* load y=a[1] */ + and Func1, Bck1 + and Func2, Bck2 + and Func3, Bck3 + and Func4, Bck4 + eor XAccu1, Func1 + eor XAccu2, Func2 + eor XAccu3, Func3 + eor XAccu4, Func4 /* XAccu == Maj(x,y,z) == (x & y) ^ (x & z) ^ (y & z) */ + /* SIGMA0(a[0]) */ + ldd Bck1, Y+4*0+0 /* we should combine this with above */ + ldd Bck2, Y+4*0+1 + ldd Bck3, Y+4*0+2 + ldd Bck4, Y+4*0+3 + movw Func1, Bck1 + movw Func3, Bck3 + ldi r20, 2 + rcall bitrotr + movw Accu1, Func1 + movw Accu3, Func3 /* Accu = shr(a[0], 2) */ + movw Func1, Bck3 + movw Func3, Bck1 /* prerotate by 16 bits */ + ldi r20, 3 + rcall bitrotl + eor Accu1, Func1 + eor Accu2, Func2 + eor Accu3, Func3 + eor Accu4, Func4 /* Accu ^= shr(a[0], 13) */ + mov Func1, Bck4 + mov Func2, Bck1 + mov Func3, Bck2 + mov Func4, Bck3 /* prerotate by 24 bits */ + ldi r20, 2 + rcall bitrotl + eor Accu1, Func1 + eor Accu2, Func2 + eor Accu3, Func3 + eor Accu4, Func4 /* Accu ^= shr(a[0], 22) */ + add Accu1, XAccu1 /* add previous result (MAJ)*/ + adc Accu2, XAccu2 + adc Accu3, XAccu3 + adc Accu4, XAccu4 + /* now we are finished with the computing stuff (t1 in T, t2 in Accu)*/ + /* a[7]=a[6]; a[6]=a[5]; a[5]=a[4]; a[4]=a[3]; a[3]=a[2]; a[2]=a[1]; a[1]=a[0]; */ + + ldi r21, 7*4 + adiw r28, 7*4 +a_shift_loop: + ld r25, -Y /* warning: this is PREdecrement */ + std Y+4, r25 + dec r21 + brne a_shift_loop + + ldd Bck1, Y+4*4+0 + ldd Bck2, Y+4*4+1 + ldd Bck3, Y+4*4+2 + ldd Bck4, Y+4*4+3 + add Bck1, T1 + adc Bck2, T2 + adc Bck3, T3 + adc Bck4, T4 + std Y+4*4+0, Bck1 + std Y+4*4+1, Bck2 + std Y+4*4+2, Bck3 + std Y+4*4+3, Bck4 + add Accu1, T1 + adc Accu2, T2 + adc Accu3, T3 + adc Accu4, T4 + std Y+4*0+0, Accu1 + std Y+4*0+1, Accu2 + std Y+4*0+2, Accu3 + std Y+4*0+3, Accu4 /* a array updated */ + + + dec LoopC + breq update_state + rjmp sha256_main_loop ;brne sha256_main_loop +update_state: + /* update state */ + /* pointers to state should still exist on the stack ;-) */ + pop r31 + pop r30 + ldi r21, 8 +update_state_loop: + ldd Accu1, Z+0 + ldd Accu2, Z+1 + ldd Accu3, Z+2 + ldd Accu4, Z+3 + ld Func1, Y+ + ld Func2, Y+ + ld Func3, Y+ + ld Func4, Y+ + add Accu1, Func1 + adc Accu2, Func2 + adc Accu3, Func3 + adc Accu4, Func4 + st Z+, Accu1 + st Z+, Accu2 + st Z+, Accu3 + st Z+, Accu4 + dec r21 + brne update_state_loop + /* now we just have to update the length */ + adiw r30, 1 /* since we add 512, we can simply skip the LSB */ + ldi r21, 2 + ldi r22, 6 + ld r20, Z + add r20, r21 + st Z+, r20 + clr r21 +sha256_nextBlock_fix_length: + brcc sha256_nextBlock_epilog + ld r20, Z + adc r20, r21 + st Z+, r20 + dec r22 + brne sha256_nextBlock_fix_length + +; EPILOG +sha256_nextBlock_epilog: +/* now we should clean up the stack */ + + pop r21 + pop r20 + in r0, SREG + cli ; we want to be uninterrupted while updating SP + out SPL, r20 + out SPH, r21 + out SREG, r0 + + clr r1 + pop r29 + pop r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + ret + +sha256_kv: ; round-key-vector stored in ProgMem +.word 0x2f98, 0x428a, 0x4491, 0x7137, 0xfbcf, 0xb5c0, 0xdba5, 0xe9b5, 0xc25b, 0x3956, 0x11f1, 0x59f1, 0x82a4, 0x923f, 0x5ed5, 0xab1c +.word 0xaa98, 0xd807, 0x5b01, 0x1283, 0x85be, 0x2431, 0x7dc3, 0x550c, 0x5d74, 0x72be, 0xb1fe, 0x80de, 0x06a7, 0x9bdc, 0xf174, 0xc19b +.word 0x69c1, 0xe49b, 0x4786, 0xefbe, 0x9dc6, 0x0fc1, 0xa1cc, 0x240c, 0x2c6f, 0x2de9, 0x84aa, 0x4a74, 0xa9dc, 0x5cb0, 0x88da, 0x76f9 +.word 0x5152, 0x983e, 0xc66d, 0xa831, 0x27c8, 0xb003, 0x7fc7, 0xbf59, 0x0bf3, 0xc6e0, 0x9147, 0xd5a7, 0x6351, 0x06ca, 0x2967, 0x1429 +.word 0x0a85, 0x27b7, 0x2138, 0x2e1b, 0x6dfc, 0x4d2c, 0x0d13, 0x5338, 0x7354, 0x650a, 0x0abb, 0x766a, 0xc92e, 0x81c2, 0x2c85, 0x9272 +.word 0xe8a1, 0xa2bf, 0x664b, 0xa81a, 0x8b70, 0xc24b, 0x51a3, 0xc76c, 0xe819, 0xd192, 0x0624, 0xd699, 0x3585, 0xf40e, 0xa070, 0x106a +.word 0xc116, 0x19a4, 0x6c08, 0x1e37, 0x774c, 0x2748, 0xbcb5, 0x34b0, 0x0cb3, 0x391c, 0xaa4a, 0x4ed8, 0xca4f, 0x5b9c, 0x6ff3, 0x682e +.word 0x82ee, 0x748f, 0x636f, 0x78a5, 0x7814, 0x84c8, 0x0208, 0x8cc7, 0xfffa, 0x90be, 0x6ceb, 0xa450, 0xa3f7, 0xbef9, 0x78f2, 0xc671 + + +;########################################################### + +.global sha256_init +;uint32_t sha256_init_vector[]={ +; 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, +; 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19 }; +; +;void sha256_init(sha256_ctx_t *state){ +; state->length=0; +; memcpy(state->h, sha256_init_vector, 8*4); +;} +; param1: (r23,r24) 16-bit pointer to sha256_ctx_t struct in ram +; modifys: Z(r30,r31), Func1, r22 +sha256_init: + movw r26, r24 ; (24,25) --> (26,27) load X with param1 + ldi r30, lo8((sha256_init_vector)) + ldi r31, hi8((sha256_init_vector)) + ldi r22, 32+8 +sha256_init_vloop: + lpm r23, Z+ + st X+, r23 + dec r22 + brne sha256_init_vloop + ret + +sha256_init_vector: +.word 0xE667, 0x6A09 +.word 0xAE85, 0xBB67 +.word 0xF372, 0x3C6E +.word 0xF53A, 0xA54F +.word 0x527F, 0x510E +.word 0x688C, 0x9B05 +.word 0xD9AB, 0x1F83 +.word 0xCD19, 0x5BE0 +.word 0x0000, 0x0000 +.word 0x0000, 0x0000 + +;########################################################### + +.global rotl32 +; === ROTL32 === +; function that rotates a 32 bit word to the left +; param1: the 32-bit word to rotate +; given in r25,r24,r23,r22 (r25 is most significant) +; param2: an 8-bit value telling how often to rotate +; given in r20 +; modifys: r21, r22 +rotl32: + cpi r20, 8 + brlo bitrotl + mov r21, r25 + mov r25, r24 + mov r24, r23 + mov r23, r22 + mov r22, r21 + subi r20, 8 + rjmp rotl32 +bitrotl: + clr r21 + clc +bitrotl_loop: + tst r20 + breq fixrotl + rol r22 + rol r23 + rol r24 + rol r25 + rol r21 + dec r20 + rjmp bitrotl_loop +fixrotl: + or r22, r21 + ret + + +;########################################################### + +.global rotr32 +; === ROTR32 === +; function that rotates a 32 bit word to the right +; param1: the 32-bit word to rotate +; given in r25,r24,r23,22 (r25 is most significant) +; param2: an 8-bit value telling how often to rotate +; given in r20 +; modifys: r21, r22 +rotr32: + cpi r20, 8 + brlo bitrotr + mov r21, r22 + mov r22, r23 + mov r23, r24 + mov r24, r25 + mov r25, r21 + subi r20, 8 + rjmp rotr32 +bitrotr: + clr r21 + clc +bitrotr_loop: + tst r20 + breq fixrotr + ror r25 + ror r24 + ror r23 + ror r22 + ror r21 + dec r20 + rjmp bitrotr_loop +fixrotr: + or r25, r21 + ret + + +;########################################################### + +.global change_endian32 +; === change_endian32 === +; function that changes the endianess of a 32-bit word +; param1: the 32-bit word +; given in r25,r24,r23,22 (r25 is most significant) +; modifys: r21, r22 +change_endian32: + movw r20, r22 ; (r22,r23) --> (r20,r21) + mov r22, r25 + mov r23, r24 + mov r24, r21 + mov r25, r20 + ret + diff --git a/shabea/sha256.h b/shabea/sha256.h new file mode 100644 index 0000000..24960a3 --- /dev/null +++ b/shabea/sha256.h @@ -0,0 +1,122 @@ +/* sha256.h */ +/* + This file is part of the AVR-Crypto-Lib. + Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ +/** + * \file sha256.h + * \author Daniel Otte + * \date 2006-05-16 + * \license GPLv3 or later + * + */ + +#ifndef SHA256_H_ +#define SHA256_H_ + +#define __LITTLE_ENDIAN__ + + +#include + +/** \def SHA256_HASH_BITS + * defines the size of a SHA-256 hash value in bits + */ + +/** \def SHA256_HASH_BYTES + * defines the size of a SHA-256 hash value in bytes + */ + +/** \def SHA256_BLOCK_BITS + * defines the size of a SHA-256 input block in bits + */ + +/** \def SHA256_BLOCK_BYTES + * defines the size of a SHA-256 input block in bytes + */ + +#define SHA256_HASH_BITS 256 +#define SHA256_HASH_BYTES (SHA256_HASH_BITS/8) +#define SHA256_BLOCK_BITS 512 +#define SHA256_BLOCK_BYTES (SHA256_BLOCK_BITS/8) + +/** \typedef sha256_ctx_t + * \brief SHA-256 context type + * + * A variable of this type may hold the state of a SHA-256 hashing process + */ +typedef struct { + uint32_t h[8]; + uint64_t length; +} sha256_ctx_t; + +/** \typedef sha256_hash_t + * \brief SHA-256 hash value type + * + * A variable of this type may hold the hash value produced by the + * sha256_ctx2hash(sha256_hash_t* dest, const sha256_ctx_t* state) function. + */ +typedef uint8_t sha256_hash_t[SHA256_HASH_BYTES]; + +/** \fn void sha256_init(sha256_ctx_t *state) + * \brief initialise a SHA-256 context + * + * This function sets a ::sha256_ctx_t to the initial values for hashing. + * \param state pointer to the SHA-256 hashing context + */ +void sha256_init(sha256_ctx_t *state); + +/** \fn void sha256_nextBlock (sha256_ctx_t* state, const void* block) + * \brief update the context with a given block + * + * This function updates the SHA-256 hash context by processing the given block + * of fixed length. + * \param state pointer to the SHA-256 hash context + * \param block pointer to the block of fixed length (512 bit = 64 byte) + */ +void sha256_nextBlock (sha256_ctx_t* state, const void* block); + +/** \fn void sha256_lastBlock(sha256_ctx_t* state, const void* block, uint16_t length_b) + * \brief finalize the context with the given block + * + * This function finalizes the SHA-256 hash context by processing the given block + * of variable length. + * \param state pointer to the SHA-256 hash context + * \param block pointer to the block of fixed length (512 bit = 64 byte) + * \param length_b the length of the block in bits + */ +void sha256_lastBlock(sha256_ctx_t* state, const void* block, uint16_t length_b); + +/** \fn void sha256_ctx2hash(sha256_hash_t* dest, const sha256_ctx_t* state) + * \brief convert the hash state into the hash value + * This function reads the context and writes the hash value to the destination + * \param dest pointer to the location where the hash value should be written + * \param state pointer to the SHA-256 hash context + */ +void sha256_ctx2hash(sha256_hash_t* dest, const sha256_ctx_t* state); + +/** \fn void sha256(sha256_hash_t* dest, const void* msg, uint32_t length_b) + * \brief simple SHA-256 hashing function for direct hashing + * + * This function automaticaly hashes a given message of arbitary length with + * the SHA-256 hashing algorithm. + * \param dest pointer to the location where the hash value is going to be written to + * \param msg pointer to the message thats going to be hashed + * \param length_b length of the message in bits + */ +void sha256(sha256_hash_t* dest, const void* msg, uint32_t length_b); + +#endif /*SHA256_H_*/ diff --git a/shabea.c b/shabea/shabea.c similarity index 100% rename from shabea.c rename to shabea/shabea.c diff --git a/shabea.h b/shabea/shabea.h similarity index 100% rename from shabea.h rename to shabea/shabea.h diff --git a/shacal1/sha1-asm.S b/shacal1/sha1-asm.S new file mode 100644 index 0000000..f571685 --- /dev/null +++ b/shacal1/sha1-asm.S @@ -0,0 +1,886 @@ +/* sha1-asm.S */ +/* + This file is part of the AVR-Crypto-Lib. + Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ +/* + * Author: Daniel Otte + * + * License: GPLv3 or later +*/ +; SHA1 implementation in assembler for AVR +SHA1_BLOCK_BITS = 512 +SHA1_HASH_BITS = 160 + +.macro precall + /* push r18 - r27, r30 - r31*/ + push r0 + push r1 + push r18 + push r19 + push r20 + push r21 + push r22 + push r23 + push r24 + push r25 + push r26 + push r27 + push r30 + push r31 + clr r1 +.endm + +.macro postcall + pop r31 + pop r30 + pop r27 + pop r26 + pop r25 + pop r24 + pop r23 + pop r22 + pop r21 + pop r20 + pop r19 + pop r18 + pop r1 + pop r0 +.endm + + +.macro hexdump length + push r27 + push r26 + ldi r25, '\r' + mov r24, r25 + call uart_putc + ldi r25, '\n' + mov r24, r25 + call uart_putc + pop r26 + pop r27 + movw r24, r26 +.if \length > 16 + ldi r22, lo8(16) + ldi r23, hi8(16) + push r27 + push r26 + call uart_hexdump + pop r26 + pop r27 + adiw r26, 16 + hexdump \length-16 +.else + ldi r22, lo8(\length) + ldi r23, hi8(\length) + call uart_hexdump +.endif +.endm + +.macro delay +/* + push r0 + push r1 + clr r0 +1: clr r1 +2: dec r1 + brne 2b + dec r0 + brne 1b + pop r1 + pop r0 // */ +.endm + +/* X points to Block */ +.macro dbg_hexdump length +/* + precall + hexdump \length + postcall + // */ +.endm + + + +.section .text + +SPL = 0x3D +SPH = 0x3E +SREG = 0x3F + + +; +;sha1_ctx_t is: +; +; [h0][h1][h2][h3][h4][length] +; hn is 32 bit large, length is 64 bit large + +;########################################################### + +.global sha1_ctx2hash +; === sha1_ctx2hash === +; this function converts a state into a normal hash (bytestring) +; param1: the 16-bit destination pointer +; given in r25,r24 (r25 is most significant) +; param2: the 16-bit pointer to sha1_ctx structure +; given in r23,r22 +sha1_ctx2hash: + movw r26, r22 + movw r30, r24 + ldi r21, 5 + sbiw r26, 4 +1: + ldi r20, 4 + adiw r26, 8 +2: + ld r0, -X + st Z+, r0 + dec r20 + brne 2b + + dec r21 + brne 1b + + ret + +;########################################################### + +.global sha1 +; === sha1 === +; this function calculates SHA-1 hashes from messages in RAM +; param1: the 16-bit hash destination pointer +; given in r25,r24 (r25 is most significant) +; param2: the 16-bit pointer to message +; given in r23,r22 +; param3: 32-bit length value (length of message in bits) +; given in r21,r20,r19,r18 +sha1: +sha1_prolog: + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r16 + push r17 + in r16, SPL + in r17, SPH + subi r16, 5*4+8 + sbci r17, 0 + in r0, SREG + cli + out SPL, r16 + out SPH, r17 + out SREG, r0 + + push r25 + push r24 + inc r16 + adc r17, r1 + + movw r8, r18 /* backup of length*/ + movw r10, r20 + + movw r12, r22 /* backup pf msg-ptr */ + + movw r24, r16 + rcall sha1_init + /* if length >= 512 */ +1: + tst r11 + brne 4f + tst r10 + brne 4f + mov r19, r9 + cpi r19, 0x02 + brlo 4f + + movw r24, r16 + movw r22, r12 + rcall sha1_nextBlock + ldi r19, 0x64 + add r22, r19 + adc r23, r1 + /* length -= 512 */ + ldi r19, 0x02 + sub r9, r19 + sbc r10, r1 + sbc r11, r1 + rjmp 1b + +4: + movw r24, r16 + movw r22, r12 + movw r20, r8 + rcall sha1_lastBlock + + pop r24 + pop r25 + movw r22, r16 + rcall sha1_ctx2hash + +sha1_epilog: + in r30, SPL + in r31, SPH + adiw r30, 5*4+8 + in r0, SREG + cli + out SPL, r30 + out SPH, r31 + out SREG, r0 + pop r17 + pop r16 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + ret + +;########################################################### + + +; block MUST NOT be larger than 64 bytes + +.global sha1_lastBlock +; === sha1_lastBlock === +; this function does padding & Co. for calculating SHA-1 hashes +; param1: the 16-bit pointer to sha1_ctx structure +; given in r25,r24 (r25 is most significant) +; param2: an 16-bit pointer to 64 byte block to hash +; given in r23,r22 +; param3: an 16-bit integer specifing length of block in bits +; given in r21,r20 +sha1_lastBlock_localSpace = (SHA1_BLOCK_BITS/8+1) + + +sha1_lastBlock: + cpi r21, 0x02 + brlo sha1_lastBlock_prolog + push r25 + push r24 + push r23 + push r22 + push r21 + push r20 + rcall sha1_nextBlock + pop r20 + pop r21 + pop r22 + pop r23 + pop r24 + pop r25 + subi r21, 2 + subi r23, -2 + rjmp sha1_lastBlock +sha1_lastBlock_prolog: + /* allocate space on stack */ + in r30, SPL + in r31, SPH + in r1, SREG + subi r30, lo8(64) + sbci r31, hi8(64) /* ??? */ + cli + out SPL, r30 + out SPH, r31 + out SREG,r1 + + adiw r30, 1 /* SP points to next free byte on stack */ + mov r18, r20 /* r20 = LSB(length) */ + lsr r18 + lsr r18 + lsr r18 + bst r21, 0 /* may be we should explain this ... */ + bld r18, 5 /* now: r18 == length/8 (aka. length in bytes) */ + + + movw r26, r22 /* X points to begin of msg */ + tst r18 + breq sha1_lastBlock_post_copy + mov r1, r18 +sha1_lastBlock_copy_loop: + ld r0, X+ + st Z+, r0 + dec r1 + brne sha1_lastBlock_copy_loop +sha1_lastBlock_post_copy: +sha1_lastBlock_insert_stuffing_bit: + ldi r19, 0x80 + mov r0,r19 + ldi r19, 0x07 + and r19, r20 /* if we are in bitmode */ + breq 2f /* no bitmode */ +1: + lsr r0 + dec r19 + brne 1b + ld r19, X +/* maybe we should do some ANDing here, just for safety */ + or r0, r19 +2: + st Z+, r0 + inc r18 + +/* checking stuff here */ + cpi r18, 64-8+1 + brsh 0f + rjmp sha1_lastBlock_insert_zeros +0: + /* oh shit, we landed here */ + /* first we have to fill it up with zeros */ + ldi r19, 64 + sub r19, r18 + breq 2f +1: + st Z+, r1 + dec r19 + brne 1b +2: + sbiw r30, 63 + sbiw r30, 1 + movw r22, r30 + + push r31 + push r30 + push r25 + push r24 + push r21 + push r20 + rcall sha1_nextBlock + pop r20 + pop r21 + pop r24 + pop r25 + pop r30 + pop r31 + + /* now we should subtract 512 from length */ + movw r26, r24 + adiw r26, 4*5+1 /* we can skip the lowest byte */ + ld r19, X + subi r19, hi8(512) + st X+, r19 + ldi r18, 6 +1: + ld r19, X + sbci r19, 0 + st X+, r19 + dec r18 + brne 1b + +; clr r18 /* not neccessary ;-) */ + /* reset Z pointer to begin of block */ + +sha1_lastBlock_insert_zeros: + ldi r19, 64-8 + sub r19, r18 + breq sha1_lastBlock_insert_length + clr r1 +1: + st Z+, r1 /* r1 is still zero */ + dec r19 + brne 1b + +; rjmp sha1_lastBlock_epilog +sha1_lastBlock_insert_length: + movw r26, r24 /* X points to state */ + adiw r26, 5*4 /* X points to (state.length) */ + adiw r30, 8 /* Z points one after the last byte of block */ + ld r0, X+ + add r0, r20 + st -Z, r0 + ld r0, X+ + adc r0, r21 + st -Z, r0 + ldi r19, 6 +1: + ld r0, X+ + adc r0, r1 + st -Z, r0 + dec r19 + brne 1b + + sbiw r30, 64-8 + movw r22, r30 + rcall sha1_nextBlock + +sha1_lastBlock_epilog: + in r30, SPL + in r31, SPH + in r1, SREG + adiw r30, 63 ; lo8(64) + adiw r30, 1 ; hi8(64) + cli + out SPL, r30 + out SPH, r31 + out SREG,r1 + clr r1 + clr r0 + ret + +/**/ +;########################################################### + +.global sha1_nextBlock +; === sha1_nextBlock === +; this is the core function for calculating SHA-1 hashes +; param1: the 16-bit pointer to sha1_ctx structure +; given in r25,r24 (r25 is most significant) +; param2: an 16-bit pointer to 64 byte block to hash +; given in r23,r22 +sha1_nextBlock_localSpace = (16+5+1)*4 ; 16 32-bit values for w array and 5 32-bit values for a array (total 84 byte) + +xtmp = 0 +xNULL = 1 +W1 = 10 +W2 = 11 +T1 = 12 +T2 = 13 +T3 = 14 +T4 = 15 +LoopC = 16 +S = 17 +tmp1 = 18 +tmp2 = 19 +tmp3 = 20 +tmp4 = 21 +F1 = 22 +F2 = 23 +F3 = 24 +F4 = 25 + +/* byteorder: high number <--> high significance */ +sha1_nextBlock: + ; initial, let's make some space ready for local vars + /* replace push & pop by mem ops? */ + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + push r28 + push r29 + in r20, SPL + in r21, SPH + movw r18, r20 ;backup SP +; movw r26, r20 ; X points to free space on stack /* maybe removeable? */ + movw r30, r22 ; Z points to message + subi r20, lo8(sha1_nextBlock_localSpace) ;sbiw can do only up to 63 + sbci r21, hi8(sha1_nextBlock_localSpace) + movw r26, r20 ; X points to free space on stack + in r0, SREG + cli ; we want to be uninterrupted while updating SP + out SPL, r20 + out SPH, r21 + out SREG, r0 + + push r18 + push r19 /* push old SP on new stack */ + push r24 + push r25 /* param1 will be needed later */ + + /* load a[] with state */ + movw 28, r24 /* load pointer to state in Y */ + adiw r26, 1 ; X++ + + ldi LoopC, 5*4 +1: ld tmp1, Y+ + st X+, tmp1 + dec LoopC + brne 1b + + movw W1, r26 /* save pointer to w[0] */ + /* load w[] with endian fixed message */ + /* we might also use the changeendian32() function at bottom */ + movw r30, r22 /* mv param2 (ponter to msg) to Z */ + ldi LoopC, 16 +1: + ldd tmp1, Z+3 + st X+, tmp1 + ldd tmp1, Z+2 + st X+, tmp1 + ldd tmp1, Z+1 + st X+, tmp1 + ld tmp1, Z + st X+, tmp1 + adiw r30, 4 + dec LoopC + brne 1b + + ;clr LoopC /* LoopC is named t in FIPS 180-2 */ + clr xtmp +sha1_nextBlock_mainloop: + mov S, LoopC + lsl S + lsl S + andi S, 0x3C /* S is a bytepointer so *4 */ + /* load w[s] */ + movw r26, W1 + add r26, S /* X points at w[s] */ + adc r27, xNULL + ld T1, X+ + ld T2, X+ + ld T3, X+ + ld T4, X+ + + /**/ + push r26 + push r27 + push T4 + push T3 + push T2 + push T1 + in r26, SPL + in r27, SPH + adiw r26, 1 + dbg_hexdump 4 + pop T1 + pop T2 + pop T3 + pop T4 + pop r27 + pop r26 + /**/ + + cpi LoopC, 16 + brlt sha1_nextBlock_mainloop_core + /* update w[s] */ + ldi tmp1, 2*4 + rcall 1f + ldi tmp1, 8*4 + rcall 1f + ldi tmp1, 13*4 + rcall 1f + rjmp 2f +1: /* this might be "outsourced" to save the jump above */ + add tmp1, S + andi tmp1, 0x3f + movw r26, W1 + add r26, tmp1 + adc r27, xNULL + ld tmp2, X+ + eor T1, tmp2 + ld tmp2, X+ + eor T2, tmp2 + ld tmp2, X+ + eor T3, tmp2 + ld tmp2, X+ + eor T4, tmp2 + ret +2: /* now we just hav to do a ROTL(T) and save T back */ + mov tmp2, T4 + rol tmp2 + rol T1 + rol T2 + rol T3 + rol T4 + movw r26, W1 + add r26, S + adc r27, xNULL + st X+, T1 + st X+, T2 + st X+, T3 + st X+, T4 + +sha1_nextBlock_mainloop_core: /* ther core function; T=ROTL5(a) ....*/ + /* T already contains w[s] */ + movw r26, W1 + sbiw r26, 4*1 /* X points at a[4] aka e */ + ld tmp1, X+ + add T1, tmp1 + ld tmp1, X+ + adc T2, tmp1 + ld tmp1, X+ + adc T3, tmp1 + ld tmp1, X+ + adc T4, tmp1 /* T = w[s]+e */ + sbiw r26, 4*5 /* X points at a[0] aka a */ + ld F1, X+ + ld F2, X+ + ld F3, X+ + ld F4, X+ + mov tmp1, F4 /* X points at a[1] aka b */ + ldi tmp2, 5 +1: + rol tmp1 + rol F1 + rol F2 + rol F3 + rol F4 + dec tmp2 + brne 1b + + add T1, F1 + adc T2, F2 + adc T3, F3 + adc T4, F4 /* T = ROTL(a,5) + e + w[s] */ + + /* now we have to do this fucking conditional stuff */ + ldi r30, lo8(sha1_nextBlock_xTable) + ldi r31, hi8(sha1_nextBlock_xTable) + add r30, xtmp + adc r31, xNULL + lpm tmp1, Z + cp tmp1, LoopC + brne 1f + inc xtmp +1: ldi r30, lo8(sha1_nextBlock_KTable) + ldi r31, hi8(sha1_nextBlock_KTable) + lsl xtmp + lsl xtmp + add r30, xtmp + adc r31, xNULL + lsr xtmp + lsr xtmp + + lpm tmp1, Z+ + add T1, tmp1 + lpm tmp1, Z+ + adc T2, tmp1 + lpm tmp1, Z+ + adc T3, tmp1 + lpm tmp1, Z+ + adc T4, tmp1 + /* T = ROTL(a,5) + e + kt + w[s] */ + + /* Z-4 is just pointing to kt ... */ + movw r28, r26 /* copy X in Y */ + adiw r30, 3*4 /* now Z points to the rigth locatin in our jump-vector-table */ + lsr r31 + ror r30 + + icall + mov F1, tmp1 + icall + mov F2, tmp1 + icall + mov F3, tmp1 + icall + + add T1, F1 + adc T2, F2 + adc T3, F3 + adc T4, tmp1 /* T = ROTL5(a) + f_t(b,c,d) + e + k_t + w[s] */ + /* X points still at a[1] aka b, Y points at a[2] aka c */ + /* update a[] */ +sha1_nextBlock_update_a: + /*first we move all vars in a[] "one up" e=d, d=c, c=b, b=a*/ + //adiw r28, 3*4 /* Y should point at a[4] aka e */ + movw r28, W1 + sbiw r28, 4 + + ldi tmp2, 4*4 +1: + ld tmp1, -Y + std Y+4, tmp1 + dec tmp2 + brne 1b + /* Y points at a[0] aka a*/ + + movw r28, W1 + sbiw r28, 5*4 + /* store T in a[0] aka a */ + st Y+, T1 + st Y+, T2 + st Y+, T3 + st Y+, T4 + /* Y points at a[1] aka b*/ + + /* rotate c */ + ldd T1, Y+1*4 + ldd T2, Y+1*4+1 + ldd T3, Y+1*4+2 + ldd T4, Y+1*4+3 + mov tmp1, T1 + ldi tmp2, 2 +1: ror tmp1 + ror T4 + ror T3 + ror T2 + ror T1 + dec tmp2 + brne 1b + std Y+1*4+0, T1 + std Y+1*4+1, T2 + std Y+1*4+2, T3 + std Y+1*4+3, T4 + + push r27 + push r26 + movw r26, W1 + sbiw r26, 4*5 + dbg_hexdump 4*5 + pop r26 + pop r27 + + inc LoopC + cpi LoopC, 80 + brge 1f + rjmp sha1_nextBlock_mainloop +/**************************************/ +1: + /* littel patch */ + sbiw r28, 4 + +/* add a[] to state and inc length */ + pop r27 + pop r26 /* now X points to state (and Y still at a[0]) */ + ldi tmp4, 5 +1: clc + ldi tmp3, 4 +2: ld tmp1, X + ld tmp2, Y+ + adc tmp1, tmp2 + st X+, tmp1 + dec tmp3 + brne 2b + dec tmp4 + brne 1b + + /* now length += 512 */ + adiw r26, 1 /* we skip the least significant byte */ + ld tmp1, X + ldi tmp2, hi8(512) /* 2 */ + add tmp1, tmp2 + st X+, tmp1 + ldi tmp2, 6 +1: + ld tmp1, X + adc tmp1, xNULL + st X+, tmp1 + dec tmp2 + brne 1b + +; EPILOG +sha1_nextBlock_epilog: +/* now we should clean up the stack */ + pop r21 + pop r20 + in r0, SREG + cli ; we want to be uninterrupted while updating SP + out SPL, r20 + out SPH, r21 + out SREG, r0 + + clr r1 + pop r29 + pop r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + ret + +sha1_nextBlock_xTable: +.byte 20,40,60,0 +sha1_nextBlock_KTable: +.int 0x5a827999 +.int 0x6ed9eba1 +.int 0x8f1bbcdc +.int 0xca62c1d6 +sha1_nextBlock_JumpTable: +rjmp sha1_nextBlock_Ch + nop +rjmp sha1_nextBlock_Parity + nop +rjmp sha1_nextBlock_Maj + nop +rjmp sha1_nextBlock_Parity + + /* X and Y still point at a[1] aka b ; return value in tmp1 */ +sha1_nextBlock_Ch: + ld tmp1, Y+ + mov tmp2, tmp1 + com tmp2 + ldd tmp3, Y+3 /* load from c */ + and tmp1, tmp3 + ldd tmp3, Y+7 /* load from d */ + and tmp2, tmp3 + eor tmp1, tmp2 + ret + +sha1_nextBlock_Maj: + ld tmp1, Y+ + mov tmp2, tmp1 + ldd tmp3, Y+3 /* load from c */ + and tmp1, tmp3 + ldd tmp4, Y+7 /* load from d */ + and tmp2, tmp4 + eor tmp1, tmp2 + and tmp3, tmp4 + eor tmp1, tmp3 + ret + +sha1_nextBlock_Parity: + ld tmp1, Y+ + ldd tmp2, Y+3 /* load from c */ + eor tmp1, tmp2 + ldd tmp2, Y+7 /* load from d */ + eor tmp1, tmp2 + ret +/* +ch_str: .asciz "\r\nCh" +maj_str: .asciz "\r\nMaj" +parity_str: .asciz "\r\nParity" +*/ +;########################################################### + +.global sha1_init +;void sha1_init(sha1_ctx_t *state){ +; DEBUG_S("\r\nSHA1_INIT"); +; state->h[0] = 0x67452301; +; state->h[1] = 0xefcdab89; +; state->h[2] = 0x98badcfe; +; state->h[3] = 0x10325476; +; state->h[4] = 0xc3d2e1f0; +; state->length = 0; +;} +; param1: (Func3,r24) 16-bit pointer to sha1_ctx_t struct in ram +; modifys: Z(r30,r31), Func1, r22 +sha1_init: + movw r26, r24 ; (24,25) --> (26,27) load X with param1 + ldi r30, lo8((sha1_init_vector)) + ldi r31, hi8((sha1_init_vector)) + ldi r22, 5*4 /* bytes to copy */ +sha1_init_vloop: + lpm r23, Z+ + st X+, r23 + dec r22 + brne sha1_init_vloop + ldi r22, 8 +sha1_init_lloop: + st X+, r1 + dec r22 + brne sha1_init_lloop + ret + +sha1_init_vector: +.int 0x67452301; +.int 0xefcdab89; +.int 0x98badcfe; +.int 0x10325476; +.int 0xc3d2e1f0; + diff --git a/shacal1/sha1.h b/shacal1/sha1.h new file mode 100644 index 0000000..6675d20 --- /dev/null +++ b/shacal1/sha1.h @@ -0,0 +1,117 @@ +/* sha1.h */ +/* + This file is part of the AVR-Crypto-Lib. + Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ +/** + * \file sha1.h + * \author Daniel Otte + * \email daniel.otte@rub.de + * \date 2006-10-08 + * \license GPLv3 or later + * \brief SHA-1 declaration. + * \ingroup SHA-1 + * + */ + +#ifndef SHA1_H_ +#define SHA1_H_ + +#include +/** \def SHA1_HASH_BITS + * definees the size of a SHA-1 hash in bits + */ + +/** \def SHA1_HASH_BYTES + * definees the size of a SHA-1 hash in bytes + */ + +/** \def SHA1_BLOCK_BITS + * definees the size of a SHA-1 input block in bits + */ + +/** \def SHA1_BLOCK_BYTES + * definees the size of a SHA-1 input block in bytes + */ +#define SHA1_HASH_BITS 160 +#define SHA1_HASH_BYTES (SHA1_HASH_BITS/8) +#define SHA1_BLOCK_BITS 512 +#define SHA1_BLOCK_BYTES (SHA1_BLOCK_BITS/8) + +/** \typedef sha1_ctx_t + * \brief SHA-1 context type + * + * A vatiable of this type may hold the state of a SHA-1 hashing process + */ +typedef struct { + uint32_t h[5]; + uint64_t length; +} sha1_ctx_t; + +/** \typedef sha1_hash_t + * \brief hash value type + * A variable of this type may hold a SHA-1 hash value + */ +typedef uint8_t sha1_hash_t[SHA1_HASH_BITS/8]; + +/** \fn sha1_init(sha1_ctx_t *state) + * \brief initializes a SHA-1 context + * This function sets a ::sha1_ctx_t variable to the initialization vector + * for SHA-1 hashing. + * \param state pointer to the SHA-1 context variable + */ +void sha1_init(sha1_ctx_t *state); + +/** \fn sha1_nextBlock(sha1_ctx_t *state, const void* block) + * \brief process one input block + * This function processes one input block and updates the hash context + * accordingly + * \param state pointer to the state variable to update + * \param block pointer to the message block to process + */ +void sha1_nextBlock (sha1_ctx_t *state, const void* block); + +/** \fn sha1_lastBlock(sha1_ctx_t *state, const void* block, uint16_t length_b) + * \brief processes the given block and finalizes the context + * This function processes the last block in a SHA-1 hashing process. + * The block should have a maximum length of a single input block. + * \param state pointer to the state variable to update and finalize + * \param block pointer to themessage block to process + * \param length_b length of the message block in bits + */ +void sha1_lastBlock (sha1_ctx_t *state, const void* block, uint16_t length_b); + +/** \fn sha1_ctx2hash(sha1_hash_t *dest, sha1_ctx_t *state) + * \brief convert a state variable into an actual hash value + * Writes the hash value corresponding to the state to the memory pointed by dest. + * \param dest pointer to the hash value destination + * \param state pointer to the hash context + */ +void sha1_ctx2hash (sha1_hash_t *dest, sha1_ctx_t *state); + +/** \fn sha1(sha1_hash_t *dest, const void* msg, uint32_t length_b) + * \brief hashing a message which in located entirely in RAM + * This function automatically hashes a message which is entirely in RAM with + * the SHA-1 hashing algorithm. + * \param dest pointer to the hash value destination + * \param msg pointer to the message which should be hashed + * \param length_b length of the message in bits + */ +void sha1(sha1_hash_t *dest, const void* msg, uint32_t length_b); + + + +#endif /*SHA1_H_*/ diff --git a/shacal1_enc.c b/shacal1/shacal1_enc.c similarity index 100% rename from shacal1_enc.c rename to shacal1/shacal1_enc.c diff --git a/shacal1_enc.h b/shacal1/shacal1_enc.h similarity index 100% rename from shacal1_enc.h rename to shacal1/shacal1_enc.h diff --git a/shacal2/sha256-asm.S b/shacal2/sha256-asm.S new file mode 100644 index 0000000..d9eb6b6 --- /dev/null +++ b/shacal2/sha256-asm.S @@ -0,0 +1,1042 @@ +/* sha256-asm.S */ +/* + This file is part of the AVR-Crypto-Lib. + Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ +/* + * Author: Daniel Otte + * + * License: GPLv3 or later +*/ +; sha-256 implementation in assembler +SHA256_BLOCK_BITS = 512 +SHA256_HASH_BITS = 256 + +.macro precall + /* push r18 - r27, r30 - r31*/ + push r0 + push r1 + push r18 + push r19 + push r20 + push r21 + push r22 + push r23 + push r24 + push r25 + push r26 + push r27 + push r30 + push r31 + clr r1 +.endm + +.macro postcall + pop r31 + pop r30 + pop r27 + pop r26 + pop r25 + pop r24 + pop r23 + pop r22 + pop r21 + pop r20 + pop r19 + pop r18 + pop r1 + pop r0 +.endm + + +.macro hexdump length + push r27 + push r26 + ldi r25, '\r' + mov r24, r25 + call uart_putc + ldi r25, '\n' + mov r24, r25 + call uart_putc + pop r26 + pop r27 + movw r24, r26 +.if \length > 16 + ldi r22, lo8(16) + ldi r23, hi8(16) + push r27 + push r26 + call uart_hexdump + pop r26 + pop r27 + adiw r26, 16 + hexdump \length-16 +.else + ldi r22, lo8(\length) + ldi r23, hi8(\length) + call uart_hexdump +.endif +.endm + +/* X points to Block */ +.macro dbg_hexdump length + precall + hexdump \length + postcall +.endm + +.section .text + +SPL = 0x3D +SPH = 0x3E +SREG = 0x3F + + +; +;sha256_ctx_t is: +; +; [h0][h1][h2][h3][h4][h5][h6][h7][length] +; hn is 32 bit large, length is 64 bit large + +;########################################################### + +.global sha256_ctx2hash +; === sha256_ctx2hash === +; this function converts a state into a normal hash (bytestring) +; param1: the 16-bit destination pointer +; given in r25,r24 (r25 is most significant) +; param2: the 16-bit pointer to sha256_ctx structure +; given in r23,r22 +sha256_ctx2hash: + movw r26, r22 + movw r30, r24 + ldi r21, 8 + sbiw r26, 4 +1: + ldi r20, 4 + adiw r26, 8 +2: + ld r0, -X + st Z+, r0 + dec r20 + brne 2b + + dec r21 + brne 1b + + ret + +;########################################################### + +.global sha256 +; === sha256 === +; this function calculates SHA-256 hashes from messages in RAM +; param1: the 16-bit hash destination pointer +; given in r25,r24 (r25 is most significant) +; param2: the 16-bit pointer to message +; given in r23,r22 +; param3: 32-bit length value (length of message in bits) +; given in r21,r20,r19,r18 +sha256: +sha256_prolog: + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r16 + push r17 + in r16, SPL + in r17, SPH + subi r16, 8*4+8 + sbci r17, 0 + in r0, SREG + cli + out SPL, r16 + out SPH, r17 + out SREG, r0 + + push r25 + push r24 + inc r16 + adc r17, r1 + + movw r8, r18 /* backup of length*/ + movw r10, r20 + + movw r12, r22 /* backup pf msg-ptr */ + + movw r24, r16 + rcall sha256_init + /* if length >= 512 */ +1: + tst r11 + brne 4f + tst r10 + brne 4f + mov r19, r9 + cpi r19, 0x02 + brlo 4f + + movw r24, r16 + movw r22, r12 + rcall sha256_nextBlock + ldi r19, 0x64 + add r22, r19 + adc r23, r1 + /* length -= 512 */ + ldi r19, 0x02 + sub r9, r19 + sbc r10, r1 + sbc r11, r1 + rjmp 1b + +4: + movw r24, r16 + movw r22, r12 + movw r20, r8 + rcall sha256_lastBlock + + pop r24 + pop r25 + movw r22, r16 + rcall sha256_ctx2hash + +sha256_epilog: + in r30, SPL + in r31, SPH + adiw r30, 8*4+8 + in r0, SREG + cli + out SPL, r30 + out SPH, r31 + out SREG, r0 + pop r17 + pop r16 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + ret + +;########################################################### + + +; block MUST NOT be larger than 64 bytes + +.global sha256_lastBlock +; === sha256_lastBlock === +; this function does padding & Co. for calculating SHA-256 hashes +; param1: the 16-bit pointer to sha256_ctx structure +; given in r25,r24 (r25 is most significant) +; param2: an 16-bit pointer to 64 byte block to hash +; given in r23,r22 +; param3: an 16-bit integer specifing length of block in bits +; given in r21,r20 +sha256_lastBlock_localSpace = (SHA256_BLOCK_BITS/8+1) + + +sha256_lastBlock: + cpi r21, 0x02 + brlo sha256_lastBlock_prolog + push r25 + push r24 + push r23 + push r22 + push r21 + push r20 + rcall sha256_nextBlock + pop r20 + pop r21 + pop r22 + pop r23 + pop r24 + pop r25 + subi r21, 0x02 + subi r23, -2 + rjmp sha256_lastBlock +sha256_lastBlock_prolog: + /* allocate space on stack */ + in r30, SPL + in r31, SPH + in r1, SREG + subi r30, lo8(64) + sbci r31, hi8(64) + cli + out SPL, r30 + out SPH, r31 + out SREG,r1 + + adiw r30, 1 /* SP points to next free byte on stack */ + mov r18, r20 /* r20 = LSB(length) */ + lsr r18 + lsr r18 + lsr r18 + bst r21, 0 /* may be we should explain this ... */ + bld r18, 5 /* now: r18 == length/8 (aka. length in bytes) */ + + + movw r26, r22 /* X points to begin of msg */ + tst r18 + breq sha256_lastBlock_post_copy + mov r1, r18 +sha256_lastBlock_copy_loop: + ld r0, X+ + st Z+, r0 + dec r1 + brne sha256_lastBlock_copy_loop +sha256_lastBlock_post_copy: +sha256_lastBlock_insert_stuffing_bit: + ldi r19, 0x80 + mov r0,r19 + ldi r19, 0x07 + and r19, r20 /* if we are in bitmode */ + breq 2f /* no bitmode */ +1: + lsr r0 + dec r19 + brne 1b + ld r19, X +/* maybe we should do some ANDing here, just for safety */ + or r0, r19 +2: + st Z+, r0 + inc r18 + +/* checking stuff here */ + cpi r18, 64-8+1 + brsh 0f + rjmp sha256_lastBlock_insert_zeros +0: + /* oh shit, we landed here */ + /* first we have to fill it up with zeros */ + ldi r19, 64 + sub r19, r18 + breq 2f +1: + st Z+, r1 + dec r19 + brne 1b +2: + sbiw r30, 63 + sbiw r30, 1 + movw r22, r30 + + push r31 + push r30 + push r25 + push r24 + push r21 + push r20 + rcall sha256_nextBlock + pop r20 + pop r21 + pop r24 + pop r25 + pop r30 + pop r31 + + /* now we should subtract 512 from length */ + movw r26, r24 + adiw r26, 4*8+1 /* we can skip the lowest byte */ + ld r19, X + subi r19, hi8(512) + st X+, r19 + ldi r18, 6 +1: + ld r19, X + sbci r19, 0 + st X+, r19 + dec r18 + brne 1b + +; clr r18 /* not neccessary ;-) */ + /* reset Z pointer to begin of block */ + +sha256_lastBlock_insert_zeros: + ldi r19, 64-8 + sub r19, r18 + breq sha256_lastBlock_insert_length + clr r1 +1: + st Z+, r1 /* r1 is still zero */ + dec r19 + brne 1b + +; rjmp sha256_lastBlock_epilog +sha256_lastBlock_insert_length: + movw r26, r24 /* X points to state */ + adiw r26, 8*4 /* X points to (state.length) */ + adiw r30, 8 /* Z points one after the last byte of block */ + ld r0, X+ + add r0, r20 + st -Z, r0 + ld r0, X+ + adc r0, r21 + st -Z, r0 + ldi r19, 6 +1: + ld r0, X+ + adc r0, r1 + st -Z, r0 + dec r19 + brne 1b + + sbiw r30, 64-8 + movw r22, r30 + rcall sha256_nextBlock + +sha256_lastBlock_epilog: + in r30, SPL + in r31, SPH + in r1, SREG + adiw r30, 63 ; lo8(64) + adiw r30, 1 ; hi8(64) + cli + out SPL, r30 + out SPH, r31 + out SREG,r1 + clr r1 + clr r0 + ret + +/**/ +;########################################################### + +.global sha256_nextBlock +; === sha256_nextBlock === +; this is the core function for calculating SHA-256 hashes +; param1: the 16-bit pointer to sha256_ctx structure +; given in r25,r24 (r25 is most significant) +; param2: an 16-bit pointer to 64 byte block to hash +; given in r23,r22 +sha256_nextBlock_localSpace = (64+8)*4 ; 64 32-bit values for w array and 8 32-bit values for a array (total 288 byte) + +Bck1 = 12 +Bck2 = 13 +Bck3 = 14 +Bck4 = 15 +Func1 = 22 +Func2 = 23 +Func3 = 24 +Func4 = 25 +Accu1 = 16 +Accu2 = 17 +Accu3 = 18 +Accu4 = 19 +XAccu1 = 8 +XAccu2 = 9 +XAccu3 = 10 +XAccu4 = 11 +T1 = 4 +T2 = 5 +T3 = 6 +T4 = 7 +LoopC = 1 +/* byteorder: high number <--> high significance */ +sha256_nextBlock: + ; initial, let's make some space ready for local vars + push r4 /* replace push & pop by mem ops? */ + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + push r28 + push r29 + in r20, SPL + in r21, SPH + movw r18, r20 ;backup SP +; movw r26, r20 ; X points to free space on stack + movw r30, r22 ; Z points to message + subi r20, lo8(sha256_nextBlock_localSpace) ;sbiw can do only up to 63 + sbci r21, hi8(sha256_nextBlock_localSpace) + movw r26, r20 ; X points to free space on stack + in r0, SREG + cli ; we want to be uninterrupted while updating SP + out SPL, r20 + out SPH, r21 + out SREG, r0 + push r18 + push r19 + push r24 + push r25 /* param1 will be needed later */ + ; now we fill the w array with message (think about endianess) + adiw r26, 1 ; X++ + ldi r20, 16 +sha256_nextBlock_wcpyloop: + ld r23, Z+ + ld r22, Z+ + ld r19, Z+ + ld r18, Z+ + st X+, r18 + st X+, r19 + st X+, r22 + st X+, r23 + dec r20 + brne sha256_nextBlock_wcpyloop +/* for (i=16; i<64; ++i){ + w[i] = SIGMA_b(w[i-2]) + w[i-7] + SIGMA_a(w[i-15]) + w[i-16]; + } */ + /* r25,r24,r23,r24 (r21,r20) are function values + r19,r18,r17,r16 are the accumulator + r15,r14,r13,rBck1 are backup1 + r11,r10,r9 ,r8 are xor accu + r1 is round counter */ + + ldi r20, 64-16 + mov LoopC, r20 +sha256_nextBlock_wcalcloop: + movw r30, r26 ; cp X to Z + sbiw r30, 63 + sbiw r30, 1 ; substract 64 = 16*4 + ld Accu1, Z+ + ld Accu2, Z+ + ld Accu3, Z+ + ld Accu4, Z+ /* w[i] = w[i-16] */ + ld Bck1, Z+ + ld Bck2, Z+ + ld Bck3, Z+ + ld Bck4, Z+ /* backup = w[i-15] */ + /* now sigma 0 */ + mov Func1, Bck2 + mov Func2, Bck3 + mov Func3, Bck4 + mov Func4, Bck1 /* prerotated by 8 */ + ldi r20, 1 + rcall bitrotl + movw XAccu1, Func1 + movw XAccu3, Func3 /* store ROTR(w[i-15],7) in xor accu */ + movw Func1, Bck3 + movw Func3, Bck1 /* prerotated by 16 */ + ldi r20, 2 + rcall bitrotr + eor XAccu1, Func1 /* xor ROTR(w[i-15], 18)*/ + eor XAccu2, Func2 + eor XAccu3, Func3 + eor XAccu4, Func4 + ldi Func2, 3 /* now shr3 */ /*we can destroy backup now*/ +sigma0_shr: + lsr Bck4 + ror Bck3 + ror Bck2 + ror Bck1 + dec Func2 + brne sigma0_shr + eor XAccu1, Bck1 + eor XAccu2, Bck2 + eor XAccu3, Bck3 + eor XAccu4, Bck4 /* xor SHR(w[i-15], 3)*/ /* xor accu == sigma1(w[i-15]) */ + add Accu1, XAccu1 + adc Accu2, XAccu2 + adc Accu3, XAccu3 + adc Accu4, XAccu4 /* finished with sigma0 */ + ldd Func1, Z+7*4 /* now accu += w[i-7] */ + ldd Func2, Z+7*4+1 + ldd Func3, Z+7*4+2 + ldd Func4, Z+7*4+3 + add Accu1, Func1 + adc Accu2, Func2 + adc Accu3, Func3 + adc Accu4, Func4 + ldd Bck1, Z+12*4 /* now backup = w[i-2]*/ + ldd Bck2, Z+12*4+1 + ldd Bck3, Z+12*4+2 + ldd Bck4, Z+12*4+3 + /* now sigma 1 */ + movw Func1, Bck3 + movw Func3, Bck1 /* prerotated by 16 */ + ldi r20, 1 + rcall bitrotr + movw XAccu3, Func3 + movw XAccu1, Func1 /* store in ROTR(w[i-2], 17) xor accu */ +; movw Func1, Bck3 +; movw Func3, Bck1 /* prerotated by 16 */ + ldi r20, 2 + rcall bitrotr + eor XAccu1, Func1 /* xor ROTR(w[i-2], 19)*/ + eor XAccu2, Func2 + eor XAccu3, Func3 + eor XAccu4, Func4 + ldi Func2, 2 /* now shr10 (dirty trick, skipping a byte) */ /*we can destroy backup now*/ +sigma1_shr: + lsr Bck4 + ror Bck3 + ror Bck2 + dec Func2 + brne sigma1_shr + eor XAccu1, Bck2 + eor XAccu2, Bck3 + eor XAccu3, Bck4 /* xor SHR(w[i-2], 10)*/ /* xor accu == sigma1(w[i-15]) */ + add Accu1, XAccu1 + adc Accu2, XAccu2 + adc Accu3, XAccu3 + adc Accu4, XAccu4 /* finished with sigma0 */ + /* now let's store the shit */ + st X+, Accu1 + st X+, Accu2 + st X+, Accu3 + st X+, Accu4 + dec LoopC + breq 3f ; skip if zero + rjmp sha256_nextBlock_wcalcloop +3: + /* we are finished with w array X points one byte post w */ +/* init a array */ + pop r31 + pop r30 + push r30 + push r31 + ldi r25, 8*4 /* 8 32-bit values to copy from ctx to a array */ +init_a_array: + ld r1, Z+ + st X+, r1 + dec r25 + brne init_a_array + +/* now the real fun begins */ +/* for (i=0; i<64; ++i){ + t1 = a[7] + SIGMA1(a[4]) + CH(a[4],a[5],a[6]) + k[i] + w[i]; + t2 = SIGMA0(a[0]) + MAJ(a[0],a[1],a[2]); + memmove(&(a[1]), &(a[0]), 7*4); // a[7]=a[6]; a[6]=a[5]; a[5]=a[4]; a[4]=a[3]; a[3]=a[2]; a[2]=a[1]; a[1]=a[0]; + a[4] += t1; + a[0] = t1 + t2; + } */ + /* Y points to a[0], Z ('cause lpm wants it) points to k[i], X points to w[i] */ + sbiw r26, 8*4 /* X still points at a[7]+1*/ + movw r28, r26 + ldi r30, lo8(sha256_kv) + ldi r31, hi8(sha256_kv) + dec r27 /* X - (64*4 == 256) */ + ldi r25, 64 + mov LoopC, r25 +sha256_main_loop: + /* now calculate t1 */ + /*CH(x,y,z) = (x&y)^((~x)&z)*/ + ldd T1, Y+5*4 + ldd T2, Y+5*4+1 + ldd T3, Y+5*4+2 + ldd T4, Y+5*4+3 /* y in T */ + ldd Func1, Y+4*4 + ldd Func2, Y+4*4+1 + ldd Func3, Y+4*4+2 + ldd Func4, Y+4*4+3 /* x in Func */ + ldd Bck1, Y+6*4 + ldd Bck2, Y+6*4+1 + ldd Bck3, Y+6*4+2 + ldd Bck4, Y+6*4+3 /* z in Bck */ + and T1, Func1 + and T2, Func2 + and T3, Func3 + and T4, Func4 + com Func1 + com Func2 + com Func3 + com Func4 + and Bck1, Func1 + and Bck2, Func2 + and Bck3, Func3 + and Bck4, Func4 + eor T1, Bck1 + eor T2, Bck2 + eor T3, Bck3 + eor T4, Bck4 /* done, CH(x,y,z) is in T */ + /* now SIGMA1(a[4]) */ + ldd Bck4, Y+4*4 /* think about using it from Func reg above*/ + ldd Bck1, Y+4*4+1 + ldd Bck2, Y+4*4+2 + ldd Bck3, Y+4*4+3 /* load prerotate by 8-bit */ + movw Func1, Bck1 + movw Func3, Bck3 + ldi r20, 2 + rcall bitrotl /* rotr(x,6) */ + movw XAccu1, Func1 + movw XAccu3, Func3 + movw Func1, Bck1 + movw Func3, Bck3 + ldi r20, 3 + rcall bitrotr /* rotr(x,11) */ + eor XAccu1, Func1 + eor XAccu2, Func2 + eor XAccu3, Func3 + eor XAccu4, Func4 + movw Func1, Bck3 /* this prerotates furteh 16 bits*/ + movw Func3, Bck1 /* so we have now prerotated by 24 bits*/ + ldi r20, 1 + rcall bitrotr /* rotr(x,11) */ + eor XAccu1, Func1 + eor XAccu2, Func2 + eor XAccu3, Func3 + eor XAccu4, Func4 /* finished with SIGMA1, add it to T */ + add T1, XAccu1 + adc T2, XAccu2 + adc T3, XAccu3 + adc T4, XAccu4 + /* now we've to add a[7], w[i] and k[i] */ + ldd XAccu1, Y+4*7 + ldd XAccu2, Y+4*7+1 + ldd XAccu3, Y+4*7+2 + ldd XAccu4, Y+4*7+3 + add T1, XAccu1 + adc T2, XAccu2 + adc T3, XAccu3 + adc T4, XAccu4 /* add a[7] */ + ld XAccu1, X+ + ld XAccu2, X+ + ld XAccu3, X+ + ld XAccu4, X+ + add T1, XAccu1 + adc T2, XAccu2 + adc T3, XAccu3 + adc T4, XAccu4 /* add w[i] */ + lpm XAccu1, Z+ + lpm XAccu2, Z+ + lpm XAccu3, Z+ + lpm XAccu4, Z+ + add T1, XAccu1 + adc T2, XAccu2 + adc T3, XAccu3 + adc T4, XAccu4 /* add k[i] */ /* finished with t1 */ + /*now t2 = SIGMA0(a[0]) + MAJ(a[0],a[1],a[2]) */ /*i did to much x86 asm, i always see 4 32bit regs*/ + /* starting with MAJ(x,y,z) */ + ldd Func1, Y+4*0+0 + ldd Func2, Y+4*0+1 + ldd Func3, Y+4*0+2 + ldd Func4, Y+4*0+3 /* load x=a[0] */ + ldd XAccu1, Y+4*1+0 + ldd XAccu2, Y+4*1+1 + ldd XAccu3, Y+4*1+2 + ldd XAccu4, Y+4*1+3 /* load y=a[1] */ + and XAccu1, Func1 + and XAccu2, Func2 + and XAccu3, Func3 + and XAccu4, Func4 /* XAccu == (x & y) */ + ldd Bck1, Y+4*2+0 + ldd Bck2, Y+4*2+1 + ldd Bck3, Y+4*2+2 + ldd Bck4, Y+4*2+3 /* load z=a[2] */ + and Func1, Bck1 + and Func2, Bck2 + and Func3, Bck3 + and Func4, Bck4 + eor XAccu1, Func1 + eor XAccu2, Func2 + eor XAccu3, Func3 + eor XAccu4, Func4 /* XAccu == (x & y) ^ (x & z) */ + ldd Func1, Y+4*1+0 + ldd Func2, Y+4*1+1 + ldd Func3, Y+4*1+2 + ldd Func4, Y+4*1+3 /* load y=a[1] */ + and Func1, Bck1 + and Func2, Bck2 + and Func3, Bck3 + and Func4, Bck4 + eor XAccu1, Func1 + eor XAccu2, Func2 + eor XAccu3, Func3 + eor XAccu4, Func4 /* XAccu == Maj(x,y,z) == (x & y) ^ (x & z) ^ (y & z) */ + /* SIGMA0(a[0]) */ + ldd Bck1, Y+4*0+0 /* we should combine this with above */ + ldd Bck2, Y+4*0+1 + ldd Bck3, Y+4*0+2 + ldd Bck4, Y+4*0+3 + movw Func1, Bck1 + movw Func3, Bck3 + ldi r20, 2 + rcall bitrotr + movw Accu1, Func1 + movw Accu3, Func3 /* Accu = shr(a[0], 2) */ + movw Func1, Bck3 + movw Func3, Bck1 /* prerotate by 16 bits */ + ldi r20, 3 + rcall bitrotl + eor Accu1, Func1 + eor Accu2, Func2 + eor Accu3, Func3 + eor Accu4, Func4 /* Accu ^= shr(a[0], 13) */ + mov Func1, Bck4 + mov Func2, Bck1 + mov Func3, Bck2 + mov Func4, Bck3 /* prerotate by 24 bits */ + ldi r20, 2 + rcall bitrotl + eor Accu1, Func1 + eor Accu2, Func2 + eor Accu3, Func3 + eor Accu4, Func4 /* Accu ^= shr(a[0], 22) */ + add Accu1, XAccu1 /* add previous result (MAJ)*/ + adc Accu2, XAccu2 + adc Accu3, XAccu3 + adc Accu4, XAccu4 + /* now we are finished with the computing stuff (t1 in T, t2 in Accu)*/ + /* a[7]=a[6]; a[6]=a[5]; a[5]=a[4]; a[4]=a[3]; a[3]=a[2]; a[2]=a[1]; a[1]=a[0]; */ + + ldi r21, 7*4 + adiw r28, 7*4 +a_shift_loop: + ld r25, -Y /* warning: this is PREdecrement */ + std Y+4, r25 + dec r21 + brne a_shift_loop + + ldd Bck1, Y+4*4+0 + ldd Bck2, Y+4*4+1 + ldd Bck3, Y+4*4+2 + ldd Bck4, Y+4*4+3 + add Bck1, T1 + adc Bck2, T2 + adc Bck3, T3 + adc Bck4, T4 + std Y+4*4+0, Bck1 + std Y+4*4+1, Bck2 + std Y+4*4+2, Bck3 + std Y+4*4+3, Bck4 + add Accu1, T1 + adc Accu2, T2 + adc Accu3, T3 + adc Accu4, T4 + std Y+4*0+0, Accu1 + std Y+4*0+1, Accu2 + std Y+4*0+2, Accu3 + std Y+4*0+3, Accu4 /* a array updated */ + + + dec LoopC + breq update_state + rjmp sha256_main_loop ;brne sha256_main_loop +update_state: + /* update state */ + /* pointers to state should still exist on the stack ;-) */ + pop r31 + pop r30 + ldi r21, 8 +update_state_loop: + ldd Accu1, Z+0 + ldd Accu2, Z+1 + ldd Accu3, Z+2 + ldd Accu4, Z+3 + ld Func1, Y+ + ld Func2, Y+ + ld Func3, Y+ + ld Func4, Y+ + add Accu1, Func1 + adc Accu2, Func2 + adc Accu3, Func3 + adc Accu4, Func4 + st Z+, Accu1 + st Z+, Accu2 + st Z+, Accu3 + st Z+, Accu4 + dec r21 + brne update_state_loop + /* now we just have to update the length */ + adiw r30, 1 /* since we add 512, we can simply skip the LSB */ + ldi r21, 2 + ldi r22, 6 + ld r20, Z + add r20, r21 + st Z+, r20 + clr r21 +sha256_nextBlock_fix_length: + brcc sha256_nextBlock_epilog + ld r20, Z + adc r20, r21 + st Z+, r20 + dec r22 + brne sha256_nextBlock_fix_length + +; EPILOG +sha256_nextBlock_epilog: +/* now we should clean up the stack */ + + pop r21 + pop r20 + in r0, SREG + cli ; we want to be uninterrupted while updating SP + out SPL, r20 + out SPH, r21 + out SREG, r0 + + clr r1 + pop r29 + pop r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + ret + +sha256_kv: ; round-key-vector stored in ProgMem +.word 0x2f98, 0x428a, 0x4491, 0x7137, 0xfbcf, 0xb5c0, 0xdba5, 0xe9b5, 0xc25b, 0x3956, 0x11f1, 0x59f1, 0x82a4, 0x923f, 0x5ed5, 0xab1c +.word 0xaa98, 0xd807, 0x5b01, 0x1283, 0x85be, 0x2431, 0x7dc3, 0x550c, 0x5d74, 0x72be, 0xb1fe, 0x80de, 0x06a7, 0x9bdc, 0xf174, 0xc19b +.word 0x69c1, 0xe49b, 0x4786, 0xefbe, 0x9dc6, 0x0fc1, 0xa1cc, 0x240c, 0x2c6f, 0x2de9, 0x84aa, 0x4a74, 0xa9dc, 0x5cb0, 0x88da, 0x76f9 +.word 0x5152, 0x983e, 0xc66d, 0xa831, 0x27c8, 0xb003, 0x7fc7, 0xbf59, 0x0bf3, 0xc6e0, 0x9147, 0xd5a7, 0x6351, 0x06ca, 0x2967, 0x1429 +.word 0x0a85, 0x27b7, 0x2138, 0x2e1b, 0x6dfc, 0x4d2c, 0x0d13, 0x5338, 0x7354, 0x650a, 0x0abb, 0x766a, 0xc92e, 0x81c2, 0x2c85, 0x9272 +.word 0xe8a1, 0xa2bf, 0x664b, 0xa81a, 0x8b70, 0xc24b, 0x51a3, 0xc76c, 0xe819, 0xd192, 0x0624, 0xd699, 0x3585, 0xf40e, 0xa070, 0x106a +.word 0xc116, 0x19a4, 0x6c08, 0x1e37, 0x774c, 0x2748, 0xbcb5, 0x34b0, 0x0cb3, 0x391c, 0xaa4a, 0x4ed8, 0xca4f, 0x5b9c, 0x6ff3, 0x682e +.word 0x82ee, 0x748f, 0x636f, 0x78a5, 0x7814, 0x84c8, 0x0208, 0x8cc7, 0xfffa, 0x90be, 0x6ceb, 0xa450, 0xa3f7, 0xbef9, 0x78f2, 0xc671 + + +;########################################################### + +.global sha256_init +;uint32_t sha256_init_vector[]={ +; 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, +; 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19 }; +; +;void sha256_init(sha256_ctx_t *state){ +; state->length=0; +; memcpy(state->h, sha256_init_vector, 8*4); +;} +; param1: (r23,r24) 16-bit pointer to sha256_ctx_t struct in ram +; modifys: Z(r30,r31), Func1, r22 +sha256_init: + movw r26, r24 ; (24,25) --> (26,27) load X with param1 + ldi r30, lo8((sha256_init_vector)) + ldi r31, hi8((sha256_init_vector)) + ldi r22, 32+8 +sha256_init_vloop: + lpm r23, Z+ + st X+, r23 + dec r22 + brne sha256_init_vloop + ret + +sha256_init_vector: +.word 0xE667, 0x6A09 +.word 0xAE85, 0xBB67 +.word 0xF372, 0x3C6E +.word 0xF53A, 0xA54F +.word 0x527F, 0x510E +.word 0x688C, 0x9B05 +.word 0xD9AB, 0x1F83 +.word 0xCD19, 0x5BE0 +.word 0x0000, 0x0000 +.word 0x0000, 0x0000 + +;########################################################### + +.global rotl32 +; === ROTL32 === +; function that rotates a 32 bit word to the left +; param1: the 32-bit word to rotate +; given in r25,r24,r23,r22 (r25 is most significant) +; param2: an 8-bit value telling how often to rotate +; given in r20 +; modifys: r21, r22 +rotl32: + cpi r20, 8 + brlo bitrotl + mov r21, r25 + mov r25, r24 + mov r24, r23 + mov r23, r22 + mov r22, r21 + subi r20, 8 + rjmp rotl32 +bitrotl: + clr r21 + clc +bitrotl_loop: + tst r20 + breq fixrotl + rol r22 + rol r23 + rol r24 + rol r25 + rol r21 + dec r20 + rjmp bitrotl_loop +fixrotl: + or r22, r21 + ret + + +;########################################################### + +.global rotr32 +; === ROTR32 === +; function that rotates a 32 bit word to the right +; param1: the 32-bit word to rotate +; given in r25,r24,r23,22 (r25 is most significant) +; param2: an 8-bit value telling how often to rotate +; given in r20 +; modifys: r21, r22 +rotr32: + cpi r20, 8 + brlo bitrotr + mov r21, r22 + mov r22, r23 + mov r23, r24 + mov r24, r25 + mov r25, r21 + subi r20, 8 + rjmp rotr32 +bitrotr: + clr r21 + clc +bitrotr_loop: + tst r20 + breq fixrotr + ror r25 + ror r24 + ror r23 + ror r22 + ror r21 + dec r20 + rjmp bitrotr_loop +fixrotr: + or r25, r21 + ret + + +;########################################################### + +.global change_endian32 +; === change_endian32 === +; function that changes the endianess of a 32-bit word +; param1: the 32-bit word +; given in r25,r24,r23,22 (r25 is most significant) +; modifys: r21, r22 +change_endian32: + movw r20, r22 ; (r22,r23) --> (r20,r21) + mov r22, r25 + mov r23, r24 + mov r24, r21 + mov r25, r20 + ret + diff --git a/shacal2/sha256.h b/shacal2/sha256.h new file mode 100644 index 0000000..24960a3 --- /dev/null +++ b/shacal2/sha256.h @@ -0,0 +1,122 @@ +/* sha256.h */ +/* + This file is part of the AVR-Crypto-Lib. + Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ +/** + * \file sha256.h + * \author Daniel Otte + * \date 2006-05-16 + * \license GPLv3 or later + * + */ + +#ifndef SHA256_H_ +#define SHA256_H_ + +#define __LITTLE_ENDIAN__ + + +#include + +/** \def SHA256_HASH_BITS + * defines the size of a SHA-256 hash value in bits + */ + +/** \def SHA256_HASH_BYTES + * defines the size of a SHA-256 hash value in bytes + */ + +/** \def SHA256_BLOCK_BITS + * defines the size of a SHA-256 input block in bits + */ + +/** \def SHA256_BLOCK_BYTES + * defines the size of a SHA-256 input block in bytes + */ + +#define SHA256_HASH_BITS 256 +#define SHA256_HASH_BYTES (SHA256_HASH_BITS/8) +#define SHA256_BLOCK_BITS 512 +#define SHA256_BLOCK_BYTES (SHA256_BLOCK_BITS/8) + +/** \typedef sha256_ctx_t + * \brief SHA-256 context type + * + * A variable of this type may hold the state of a SHA-256 hashing process + */ +typedef struct { + uint32_t h[8]; + uint64_t length; +} sha256_ctx_t; + +/** \typedef sha256_hash_t + * \brief SHA-256 hash value type + * + * A variable of this type may hold the hash value produced by the + * sha256_ctx2hash(sha256_hash_t* dest, const sha256_ctx_t* state) function. + */ +typedef uint8_t sha256_hash_t[SHA256_HASH_BYTES]; + +/** \fn void sha256_init(sha256_ctx_t *state) + * \brief initialise a SHA-256 context + * + * This function sets a ::sha256_ctx_t to the initial values for hashing. + * \param state pointer to the SHA-256 hashing context + */ +void sha256_init(sha256_ctx_t *state); + +/** \fn void sha256_nextBlock (sha256_ctx_t* state, const void* block) + * \brief update the context with a given block + * + * This function updates the SHA-256 hash context by processing the given block + * of fixed length. + * \param state pointer to the SHA-256 hash context + * \param block pointer to the block of fixed length (512 bit = 64 byte) + */ +void sha256_nextBlock (sha256_ctx_t* state, const void* block); + +/** \fn void sha256_lastBlock(sha256_ctx_t* state, const void* block, uint16_t length_b) + * \brief finalize the context with the given block + * + * This function finalizes the SHA-256 hash context by processing the given block + * of variable length. + * \param state pointer to the SHA-256 hash context + * \param block pointer to the block of fixed length (512 bit = 64 byte) + * \param length_b the length of the block in bits + */ +void sha256_lastBlock(sha256_ctx_t* state, const void* block, uint16_t length_b); + +/** \fn void sha256_ctx2hash(sha256_hash_t* dest, const sha256_ctx_t* state) + * \brief convert the hash state into the hash value + * This function reads the context and writes the hash value to the destination + * \param dest pointer to the location where the hash value should be written + * \param state pointer to the SHA-256 hash context + */ +void sha256_ctx2hash(sha256_hash_t* dest, const sha256_ctx_t* state); + +/** \fn void sha256(sha256_hash_t* dest, const void* msg, uint32_t length_b) + * \brief simple SHA-256 hashing function for direct hashing + * + * This function automaticaly hashes a given message of arbitary length with + * the SHA-256 hashing algorithm. + * \param dest pointer to the location where the hash value is going to be written to + * \param msg pointer to the message thats going to be hashed + * \param length_b length of the message in bits + */ +void sha256(sha256_hash_t* dest, const void* msg, uint32_t length_b); + +#endif /*SHA256_H_*/ diff --git a/shacal2_enc.c b/shacal2/shacal2_enc.c similarity index 100% rename from shacal2_enc.c rename to shacal2/shacal2_enc.c diff --git a/shacal2_enc.h b/shacal2/shacal2_enc.h similarity index 100% rename from shacal2_enc.h rename to shacal2/shacal2_enc.h diff --git a/skipjack.c b/skipjack/skipjack.c similarity index 100% rename from skipjack.c rename to skipjack/skipjack.c diff --git a/skipjack.h b/skipjack/skipjack.h similarity index 100% rename from skipjack.h rename to skipjack/skipjack.h diff --git a/test_src/main-hmac-md5-test.c b/test_src/main-hmac-md5-test.c index dfbcead..f61e119 100644 --- a/test_src/main-hmac-md5-test.c +++ b/test_src/main-hmac-md5-test.c @@ -28,9 +28,10 @@ #include "md5.h" #include "hmac-md5.h" +/* #include "base64_enc.h" #include "base64_dec.h" - +*/ #include "nessie_mac_test.h" #include @@ -109,6 +110,7 @@ void strhexdump(char* dest, void* src, uint16_t length){ } } +/* void cram_md5_interactive(void){ char key[101]; char msg_b64[101]; @@ -137,9 +139,9 @@ void cram_md5_interactive(void){ cli_putstr_P(PSTR("\r\nresponse: ")); cli_hexdump(hmac, HMAC_MD5_BYTES); cli_putstr_P(PSTR("\r\nresponse (b64): ")); - cli_putstr(msg_b64); - + cli_putstr(msg_b64); } +*/ void md5_interactive(void){ @@ -164,7 +166,7 @@ const char test_str[] PROGMEM = "test"; /* const char performance_str[] PROGMEM = "performance"; */ const char echo_str[] PROGMEM = "echo"; const char hmd5i_str[] PROGMEM = "hmac-md5"; -const char crammd5i_str[] PROGMEM = "cram-md5"; +/* const char crammd5i_str[] PROGMEM = "cram-md5"; */ const char md5i_str[] PROGMEM = "md5"; @@ -172,7 +174,7 @@ cmdlist_entry_t cmdlist[] PROGMEM = { { nessie_str, NULL, testrun_nessie_hmacmd5}, { test_str, NULL, testrun_test_hmacmd5}, { hmd5i_str, NULL, hmacmd5_interactive}, - { crammd5i_str, NULL, cram_md5_interactive}, +/* { crammd5i_str, NULL, cram_md5_interactive}, */ { md5i_str, NULL, md5_interactive}, /* { performance_str, NULL, testrun_performance_hmacmd5}, */ { echo_str, (void*)1, (void_fpt)echo_ctrl}, diff --git a/trivium.c b/trivium/trivium.c similarity index 100% rename from trivium.c rename to trivium/trivium.c diff --git a/trivium.h b/trivium/trivium.h similarity index 100% rename from trivium.h rename to trivium/trivium.h diff --git a/xtea-asm.S b/xtea/xtea-asm.S similarity index 100% rename from xtea-asm.S rename to xtea/xtea-asm.S diff --git a/xtea.c b/xtea/xtea.c similarity index 100% rename from xtea.c rename to xtea/xtea.c diff --git a/xtea.h b/xtea/xtea.h similarity index 100% rename from xtea.h rename to xtea/xtea.h -- 2.39.5