From: bg Date: Fri, 20 Oct 2006 21:03:55 +0000 (+0000) Subject: added sha1 in C and AVR-ASM X-Git-Url: https://git.cryptolib.org/?a=commitdiff_plain;h=6e51024d966bd015cb8f9c8460c5c21da6a08f9e;p=avr-crypto-lib.git added sha1 in C and AVR-ASM --- diff --git a/Makefile b/Makefile index b207adb..1dbe4e8 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ -PRG = rc6 +PRG = sha1 # cryptotest -OBJ = main-rc6-test.o debug.o uart.o serial-tools.o rc6.o +OBJ = main-sha1-test.o debug.o uart.o serial-tools.o sha1-asm.o # main-md5-test.o debug.o uart.o serial-tools.o md5.o # main-cast5-test.o debug.o uart.o serial-tools.o cast5.o # main.o debug.o uart.o serial-tools.o sha256-asm.o xtea-asm.o arcfour-asm.o prng.o cast5.o diff --git a/config.h b/config.h index 58e9b04..a416ef0 100644 --- a/config.h +++ b/config.h @@ -2,13 +2,13 @@ #define __CONFIG_H__ #include -#define F_CPU 16000000 // Oszillator-Frequenz in Hz +#define F_CPU 8000000 // Oszillator-Frequenz in Hz #define DEBUG uart //c uart.[ch] defines #define UART_INTERRUPT 1 -#define UART_BAUD_RATE 2400 +#define UART_BAUD_RATE 38400 #define UART_RXBUFSIZE 16 #define UART_TXBUFSIZE 16 #define UART_LINE_BUFFER_SIZE 40 diff --git a/main-sha1-test.c b/main-sha1-test.c new file mode 100644 index 0000000..2bfbf17 --- /dev/null +++ b/main-sha1-test.c @@ -0,0 +1,91 @@ +/* + * SHA-1 test-suit + * +*/ + +#include "config.h" +#include "serial-tools.h" +#include "uart.h" +#include "debug.h" + +#include "sha1.h" + +#include +#include + + +/***************************************************************************** + * additional validation-functions * + *****************************************************************************/ + +/***************************************************************************** + * self tests * + *****************************************************************************/ + +void sha1_ctx_dump(sha1_ctx_t *s){ + uint8_t i; + uart_putstr("\r\n==== sha1_ctx_dump ===="); + for(i=0;i<5;++i){ + uart_putstr("\r\na["); uart_hexdump(&i, 1); uart_putstr("]: "); + uart_hexdump(&(s->h[i]), 4); + } + uart_putstr("\r\nlength"); uart_hexdump(&i, 8); +} + +void testrun_sha1(void){ + sha1_hash_t hash; + sha1(&hash,"abc",3*8); + uart_putstr("\r\nsha1(\"abc\") = \r\n\t"); + uart_hexdump(hash,SHA1_HASH_BITS/8); + + sha1(&hash,"abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq",448); + uart_putstr("\r\nsha1(\"abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq\") = \r\n\t"); + uart_hexdump(hash,SHA1_HASH_BITS/8); + + uart_putstr("\r\nsha1(1,000,000 * 'a') = \r\n\t"); + { + uint8_t block[SHA1_BLOCK_BITS/8]; + uint16_t i; + sha1_ctx_t s; + memset(block,'a',SHA1_BLOCK_BITS/8); + sha1_init(&s); + for(i=0;i<15625; ++i){ /* (1000000/(SHA1_BLOCK_BITS/8)) */ + sha1_nextBlock(&s, block); + } + sha1_lastBlock(&s,block,0); + sha1_ctx2hash(&hash, &s); + } + uart_hexdump(hash,SHA1_HASH_BITS/8); + + + uart_putstr("\r\nx"); +} + + + +/***************************************************************************** + * main * + *****************************************************************************/ + +int main (void){ + char str[20]; + + DEBUG_INIT(); + uart_putstr("\r\n"); + + uart_putstr("\r\n\r\nCrypto-VS (SHA-1)\r\nloaded and running\r\n"); + +restart: + while(1){ + if (!getnextwordn(str,20)) {DEBUG_S("DBG: W1\r\n"); goto error;} + if (strcmp(str, "test")) {DEBUG_S("DBG: 1b\r\n"); goto error;} + testrun_sha1(); + goto restart; + continue; + error: + uart_putstr("ERROR\r\n"); + } + + +} + diff --git a/sha1-asm.S b/sha1-asm.S new file mode 100644 index 0000000..278d52a --- /dev/null +++ b/sha1-asm.S @@ -0,0 +1,978 @@ +/* + * Author: Daniel Otte + * + * License: GPL +*/ +; SHA1 implementation in assembler for AVR +SHA1_BLOCK_BITS = 512 +SHA1_HASH_BITS = 160 + +.macro precall + /* push r18 - r27, r30 - r31*/ + push r0 + push r1 + push r18 + push r19 + push r20 + push r21 + push r22 + push r23 + push r24 + push r25 + push r26 + push r27 + push r30 + push r31 + clr r1 +.endm + +.macro postcall + pop r31 + pop r30 + pop r27 + pop r26 + pop r25 + pop r24 + pop r23 + pop r22 + pop r21 + pop r20 + pop r19 + pop r18 + pop r1 + pop r0 +.endm + + +.macro hexdump length + push r27 + push r26 + ldi r25, '\r' + mov r24, r25 + call uart_putc + ldi r25, '\n' + mov r24, r25 + call uart_putc + pop r26 + pop r27 + movw r24, r26 +.if \length > 16 + ldi r22, lo8(16) + ldi r23, hi8(16) + push r27 + push r26 + call uart_hexdump + pop r26 + pop r27 + adiw r26, 16 + hexdump \length-16 +.else + ldi r22, lo8(\length) + ldi r23, hi8(\length) + call uart_hexdump +.endif +.endm + +.macro delay +/* + push r0 + push r1 + clr r0 +1: clr r1 +2: dec r1 + brne 2b + dec r0 + brne 1b + pop r1 + pop r0 // */ +.endm + +/* X points to Block */ +.macro dbg_hexdump length +/* + precall + hexdump \length + postcall + // */ +.endm + + + +.section .text + +SPL = 0x3D +SPH = 0x3E +SREG = 0x3F + + +; +;sha1_ctx_t is: +; +; [h0][h1][h2][h3][h4][length] +; hn is 32 bit large, length is 64 bit large + +;########################################################### + +.global sha1_ctx2hash +; === sha1_ctx2hash === +; this function converts a state into a normal hash (bytestring) +; param1: the 16-bit destination pointer +; given in r25,r24 (r25 is most significant) +; param2: the 16-bit pointer to sha1_ctx structure +; given in r23,r22 +sha1_ctx2hash: + movw r26, r22 + movw r30, r24 + ldi r21, 5 + sbiw r26, 4 +1: + ldi r20, 4 + adiw r26, 8 +2: + ld r0, -X + st Z+, r0 + dec r20 + brne 2b + + dec r21 + brne 1b + + ret + +;########################################################### + +.global sha1 +; === sha1 === +; this function calculates SHA-1 hashes from messages in RAM +; param1: the 16-bit hash destination pointer +; given in r25,r24 (r25 is most significant) +; param2: the 16-bit pointer to message +; given in r23,r22 +; param3: 32-bit length value (length of message in bits) +; given in r21,r20,r19,r18 +sha1: +sha1_prolog: + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r16 + push r17 + in r16, SPL + in r17, SPH + subi r16, 5*4+8 + sbci r17, 0 + in r0, SREG + cli + out SPL, r16 + out SPH, r17 + out SREG, r0 + + push r25 + push r24 + inc r16 + adc r17, r1 + + movw r8, r18 /* backup of length*/ + movw r10, r20 + + movw r12, r22 /* backup pf msg-ptr */ + + movw r24, r16 + rcall sha1_init + /* if length >= 512 */ +1: + tst r11 + brne 4f + tst r10 + brne 4f + mov r19, r9 + cpi r19, 0x02 + brlo 4f + + movw r24, r16 + movw r22, r12 + rcall sha1_nextBlock + ldi r19, 0x64 + add r22, r19 + adc r23, r1 + /* length -= 512 */ + ldi r19, 0x02 + sub r9, r19 + sbc r10, r1 + sbc r11, r1 + rjmp 1b + +4: + movw r24, r16 + movw r22, r12 + movw r20, r8 + rcall sha1_lastBlock + + pop r24 + pop r25 + movw r22, r16 + rcall sha1_ctx2hash + +sha1_epilog: + in r30, SPL + in r31, SPH + adiw r30, 5*4+8 + in r0, SREG + cli + out SPL, r30 + out SPH, r31 + out SREG, r0 + pop r17 + pop r16 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + ret + +;########################################################### + + +; block MUST NOT be larger than 64 bytes + +.global sha1_lastBlock +; === sha1_lastBlock === +; this function does padding & Co. for calculating SHA-1 hashes +; param1: the 16-bit pointer to sha1_ctx structure +; given in r25,r24 (r25 is most significant) +; param2: an 16-bit pointer to 64 byte block to hash +; given in r23,r22 +; param3: an 16-bit integer specifing length of block in bits +; given in r21,r20 +sha1_lastBlock_localSpace = (SHA1_BLOCK_BITS/8+1) + + +sha1_lastBlock: + tst r20 + brne sha1_lastBlock_prolog + cpi r21, 0x02 + brne sha1_lastBlock_prolog + push r25 + push r24 + push r23 + push r22 + rcall sha1_nextBlock + pop r22 + pop r23 + pop r24 + pop r25 + clr r21 + clr r22 +sha1_lastBlock_prolog: + /* allocate space on stack */ + in r30, SPL + in r31, SPH + in r1, SREG + subi r30, lo8(64) + sbci r31, hi8(64) /* ??? */ + cli + out SPL, r30 + out SPH, r31 + out SREG,r1 + + adiw r30, 1 /* SP points to next free byte on stack */ + mov r18, r20 /* r20 = LSB(length) */ + lsr r18 + lsr r18 + lsr r18 + bst r21, 0 /* may be we should explain this ... */ + bld r18, 5 /* now: r18 == length/8 (aka. length in bytes) */ + + + movw r26, r22 /* X points to begin of msg */ + tst r18 + breq sha1_lastBlock_post_copy + mov r1, r18 +sha1_lastBlock_copy_loop: + ld r0, X+ + st Z+, r0 + dec r1 + brne sha1_lastBlock_copy_loop +sha1_lastBlock_post_copy: +sha1_lastBlock_insert_stuffing_bit: + ldi r19, 0x80 + mov r0,r19 + ldi r19, 0x07 + and r19, r20 /* if we are in bitmode */ + breq 2f /* no bitmode */ +1: + lsr r0 + dec r19 + brne 1b + ld r19, X +/* maybe we should do some ANDing here, just for safety */ + or r0, r19 +2: + st Z+, r0 + inc r18 + +/* checking stuff here */ + cpi r18, 64-8+1 + brsh 0f + rjmp sha1_lastBlock_insert_zeros +0: + /* oh shit, we landed here */ + /* first we have to fill it up with zeros */ + ldi r19, 64 + sub r19, r18 + breq 2f +1: + st Z+, r1 + dec r19 + brne 1b +2: + sbiw r30, 63 + sbiw r30, 1 + movw r22, r30 + + push r31 + push r30 + push r25 + push r24 + push r21 + push r20 + rcall sha1_nextBlock + pop r20 + pop r21 + pop r24 + pop r25 + pop r30 + pop r31 + + /* now we should subtract 512 from length */ + movw r26, r24 + adiw r26, 4*5+1 /* we can skip the lowest byte */ + ld r19, X + subi r19, hi8(512) + st X+, r19 + ldi r18, 6 +1: + ld r19, X + sbci r19, 0 + st X+, r19 + dec r18 + brne 1b + +; clr r18 /* not neccessary ;-) */ + /* reset Z pointer to begin of block */ + +sha1_lastBlock_insert_zeros: + ldi r19, 64-8 + sub r19, r18 + breq sha1_lastBlock_insert_length + clr r1 +1: + st Z+, r1 /* r1 is still zero */ + dec r19 + brne 1b + +; rjmp sha1_lastBlock_epilog +sha1_lastBlock_insert_length: + movw r26, r24 /* X points to state */ + adiw r26, 5*4 /* X points to (state.length) */ + adiw r30, 8 /* Z points one after the last byte of block */ + ld r0, X+ + add r0, r20 + st -Z, r0 + ld r0, X+ + adc r0, r21 + st -Z, r0 + ldi r19, 6 +1: + ld r0, X+ + adc r0, r1 + st -Z, r0 + dec r19 + brne 1b + + sbiw r30, 64-8 + movw r22, r30 + rcall sha1_nextBlock + +sha1_lastBlock_epilog: + in r30, SPL + in r31, SPH + in r1, SREG + adiw r30, 63 ; lo8(64) + adiw r30, 1 ; hi8(64) + cli + out SPL, r30 + out SPH, r31 + out SREG,r1 + clr r1 + clr r0 + ret + +/**/ +;########################################################### + +.global sha1_nextBlock +; === sha1_nextBlock === +; this is the core function for calculating SHA-1 hashes +; param1: the 16-bit pointer to sha1_ctx structure +; given in r25,r24 (r25 is most significant) +; param2: an 16-bit pointer to 64 byte block to hash +; given in r23,r22 +sha1_nextBlock_localSpace = (16+5+1)*4 ; 16 32-bit values for w array and 5 32-bit values for a array (total 84 byte) + +xtmp = 0 +xNULL = 1 +W1 = 10 +W2 = 11 +T1 = 12 +T2 = 13 +T3 = 14 +T4 = 15 +LoopC = 16 +S = 17 +tmp1 = 18 +tmp2 = 19 +tmp3 = 20 +tmp4 = 21 +F1 = 22 +F2 = 23 +F3 = 24 +F4 = 25 + +/* byteorder: high number <--> high significance */ +sha1_nextBlock: + ; initial, let's make some space ready for local vars + /* replace push & pop by mem ops? */ + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + push r28 + push r29 + in r20, SPL + in r21, SPH + movw r18, r20 ;backup SP +; movw r26, r20 ; X points to free space on stack /* maybe removeable? */ + movw r30, r22 ; Z points to message + subi r20, lo8(sha1_nextBlock_localSpace) ;sbiw can do only up to 63 + sbci r21, hi8(sha1_nextBlock_localSpace) + movw r26, r20 ; X points to free space on stack + in r0, SREG + cli ; we want to be uninterrupted while updating SP + out SPL, r20 + out SPH, r21 + out SREG, r0 + + push r18 + push r19 /* push old SP on new stack */ + push r24 + push r25 /* param1 will be needed later */ + + /* load a[] with state */ + movw 28, r24 /* load pointer to state in Y */ + adiw r26, 1 ; X++ + + ldi LoopC, 5*4 +1: ld tmp1, Y+ + st X+, tmp1 + dec LoopC + brne 1b + + movw W1, r26 /* save pointer to w[0] */ + /* load w[] with endian fixed message */ + /* we might also use the changeendian32() function at bottom */ + movw r30, r22 /* mv param2 (ponter to msg) to Z */ + ldi LoopC, 16 +1: + ldd tmp1, Z+3 + st X+, tmp1 + ldd tmp1, Z+2 + st X+, tmp1 + ldd tmp1, Z+1 + st X+, tmp1 + ld tmp1, Z + st X+, tmp1 + adiw r30, 4 + dec LoopC + brne 1b + + ;clr LoopC /* LoopC is named t in FIPS 180-2 */ + clr xtmp +sha1_nextBlock_mainloop: + mov S, LoopC + lsl S + lsl S + andi S, 0x3C /* S is a bytepointer so *4 */ + /* load w[s] */ + movw r26, W1 + add r26, S /* X points at w[s] */ + adc r27, xNULL + ld T1, X+ + ld T2, X+ + ld T3, X+ + ld T4, X+ + + /**/ + push r26 + push r27 + push T4 + push T3 + push T2 + push T1 + in r26, SPL + in r27, SPH + adiw r26, 1 + dbg_hexdump 4 + pop T1 + pop T2 + pop T3 + pop T4 + pop r27 + pop r26 + /**/ + + cpi LoopC, 16 + brlt sha1_nextBlock_mainloop_core + /* update w[s] */ + ldi tmp1, 2*4 + rcall 1f + ldi tmp1, 8*4 + rcall 1f + ldi tmp1, 13*4 + rcall 1f + rjmp 2f +1: /* this might be "outsourced" to save the jump above */ + add tmp1, S + andi tmp1, 0x3f + movw r26, W1 + add r26, tmp1 + adc r27, xNULL + ld tmp2, X+ + eor T1, tmp2 + ld tmp2, X+ + eor T2, tmp2 + ld tmp2, X+ + eor T3, tmp2 + ld tmp2, X+ + eor T4, tmp2 + ret +2: /* now we just hav to do a ROTL(T) and save T back */ + mov tmp2, T4 + rol tmp2 + rol T1 + rol T2 + rol T3 + rol T4 + movw r26, W1 + add r26, S + adc r27, xNULL + st X+, T1 + st X+, T2 + st X+, T3 + st X+, T4 + +sha1_nextBlock_mainloop_core: /* ther core function; T=ROTL5(a) ....*/ + /* T already contains w[s] */ + movw r26, W1 + sbiw r26, 4*1 /* X points at a[4] aka e */ + ld tmp1, X+ + add T1, tmp1 + ld tmp1, X+ + adc T2, tmp1 + ld tmp1, X+ + adc T3, tmp1 + ld tmp1, X+ + adc T4, tmp1 /* T = w[s]+e */ + sbiw r26, 4*5 /* X points at a[0] aka a */ + ld F1, X+ + ld F2, X+ + ld F3, X+ + ld F4, X+ + mov tmp1, F4 /* X points at a[1] aka b */ + ldi tmp2, 5 +1: + rol tmp1 + rol F1 + rol F2 + rol F3 + rol F4 + dec tmp2 + brne 1b + + add T1, F1 + adc T2, F2 + adc T3, F3 + adc T4, F4 /* T = ROTL(a,5) + e + w[s] */ + + /* now we have to do this fucking conditional stuff */ + ldi r30, lo8(sha1_nextBlock_xTable) + ldi r31, hi8(sha1_nextBlock_xTable) + add r30, xtmp + adc r31, xNULL + lpm tmp1, Z + cp tmp1, LoopC + brne 1f + inc xtmp +1: ldi r30, lo8(sha1_nextBlock_KTable) + ldi r31, hi8(sha1_nextBlock_KTable) + lsl xtmp + lsl xtmp + add r30, xtmp + adc r31, xNULL + lsr xtmp + lsr xtmp + + lpm tmp1, Z+ + add T1, tmp1 + lpm tmp1, Z+ + adc T2, tmp1 + lpm tmp1, Z+ + adc T3, tmp1 + lpm tmp1, Z+ + adc T4, tmp1 + /* T = ROTL(a,5) + e + kt + w[s] */ + + /* wo Z-4 gerade auf kt zeigt ... */ + movw r28, r26 /* copy X in Y */ + adiw r30, 3*4 /* now Z points to the rigth locatin in our jump-vector-table */ + clc + ror r31 + ror r30 + + icall + mov F1, tmp1 + icall + mov F2, tmp1 + icall + mov F3, tmp1 + icall + + add T1, F1 + adc T2, F2 + adc T3, F3 + adc T4, tmp1 /* T = ROTL5(a) + f_t(b,c,d) + e + k_t + w[s] */ + /* X points still at a[1] aka b, Y points at a[2] aka c */ + /* update a[] */ +sha1_nextBlock_update_a: + /*first we move all vars in a[] "one up" e=d, d=c, c=b, b=a*/ + //adiw r28, 3*4 /* Y should point at a[4] aka e */ + movw r28, W1 + sbiw r28, 4 + + ldi tmp2, 4*4 +1: + ld tmp1, -Y + std Y+4, tmp1 + dec tmp2 + brne 1b + /* Y points at a[0] aka a*/ + + movw r28, W1 + sbiw r28, 5*4 + /* store T in a[0] aka a */ + st Y+, T1 + st Y+, T2 + st Y+, T3 + st Y+, T4 + /* Y points at a[1] aka b*/ + + /* rotate c */ + ldd T1, Y+1*4 + ldd T2, Y+1*4+1 + ldd T3, Y+1*4+2 + ldd T4, Y+1*4+3 + mov tmp1, T1 + ldi tmp2, 2 +1: ror tmp1 + ror T4 + ror T3 + ror T2 + ror T1 + dec tmp2 + brne 1b + std Y+1*4+0, T1 + std Y+1*4+1, T2 + std Y+1*4+2, T3 + std Y+1*4+3, T4 + + push r27 + push r26 + movw r26, W1 + sbiw r26, 4*5 + dbg_hexdump 4*5 + pop r26 + pop r27 + + inc LoopC + cpi LoopC, 80 + brge 1f + jmp sha1_nextBlock_mainloop +/**************************************/ +1: + /* littel patch */ + sbiw r28, 4 + +/* add a[] to state and inc length */ + pop r27 + pop r26 /* now X points to state (and Y still at a[0]) */ + ldi tmp4, 5 +1: clc + ldi tmp3, 4 +2: ld tmp1, X + ld tmp2, Y+ + adc tmp1, tmp2 + st X+, tmp1 + dec tmp3 + brne 2b + dec tmp4 + brne 1b + + /* now length += 512 */ + adiw r26, 1 /* we skip the least significant byte */ + ld tmp1, X + ldi tmp2, hi8(512) /* 2 */ + add tmp1, tmp2 + st X+, tmp1 + ldi tmp2, 6 +1: + ld tmp1, X + adc tmp1, xNULL + st X+, tmp1 + dec tmp2 + brne 1b + +; EPILOG +sha1_nextBlock_epilog: +/* now we should clean up the stack */ + pop r21 + pop r20 + in r0, SREG + cli ; we want to be uninterrupted while updating SP + out SPL, r20 + out SPH, r21 + out SREG, r0 + + clr r1 + pop r29 + pop r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + ret + +sha1_nextBlock_xTable: +.byte 20,40,60,0 +sha1_nextBlock_KTable: +.int 0x5a827999 +.int 0x6ed9eba1 +.int 0x8f1bbcdc +.int 0xca62c1d6 +sha1_nextBlock_JumpTable: +jmp sha1_nextBlock_Ch +jmp sha1_nextBlock_Parity +jmp sha1_nextBlock_Maj +jmp sha1_nextBlock_Parity + + /* X and Y still point at a[1] aka b ; return value in tmp1 */ +sha1_nextBlock_Ch: + ld tmp1, Y+ + mov tmp2, tmp1 + com tmp2 + ldd tmp3, Y+3 /* load from c */ + and tmp1, tmp3 + ldd tmp3, Y+7 /* load from d */ + and tmp2, tmp3 + eor tmp1, tmp2 + /** + precall + ldi r24, lo8(ch_str) + ldi r25, hi8(ch_str) + call uart_putstr_P + postcall + /**/ + ret + +sha1_nextBlock_Maj: + ld tmp1, Y+ + mov tmp2, tmp1 + ldd tmp3, Y+3 /* load from c */ + and tmp1, tmp3 + ldd tmp4, Y+7 /* load from d */ + and tmp2, tmp4 + eor tmp1, tmp2 + and tmp3, tmp4 + eor tmp1, tmp3 + /** + precall + ldi r24, lo8(maj_str) + ldi r25, hi8(maj_str) + call uart_putstr_P + postcall + /**/ + ret + +sha1_nextBlock_Parity: + ld tmp1, Y+ + ldd tmp2, Y+3 /* load from c */ + eor tmp1, tmp2 + ldd tmp2, Y+7 /* load from d */ + eor tmp1, tmp2 + + /** + precall + ldi r24, lo8(parity_str) + ldi r25, hi8(parity_str) + call uart_putstr_P + postcall + /**/ + ret +/* +ch_str: .asciz "\r\nCh" +maj_str: .asciz "\r\nMaj" +parity_str: .asciz "\r\nParity" +*/ +;########################################################### + +.global sha1_init +;void sha1_init(sha1_ctx_t *state){ +; DEBUG_S("\r\nSHA1_INIT"); +; state->h[0] = 0x67452301; +; state->h[1] = 0xefcdab89; +; state->h[2] = 0x98badcfe; +; state->h[3] = 0x10325476; +; state->h[4] = 0xc3d2e1f0; +; state->length = 0; +;} +; param1: (Func3,r24) 16-bit pointer to sha1_ctx_t struct in ram +; modifys: Z(r30,r31), Func1, r22 +sha1_init: + movw r26, r24 ; (24,25) --> (26,27) load X with param1 + ldi r30, lo8((sha1_init_vector)) + ldi r31, hi8((sha1_init_vector)) + ldi r22, 5*4 /* bytes to copy */ +sha1_init_vloop: + lpm r23, Z+ + st X+, r23 + dec r22 + brne sha1_init_vloop + ldi r22, 8 + clr r1 /* this should not be needed */ +sha1_init_lloop: + st X+, r1 + dec r22 + brne sha1_init_lloop + ret + +sha1_init_vector: +.int 0x67452301; +.int 0xefcdab89; +.int 0x98badcfe; +.int 0x10325476; +.int 0xc3d2e1f0; +/* +;########################################################### + +.global rotl32 +; === ROTL32 === +; function that rotates a 32 bit word to the left +; param1: the 32-bit word to rotate +; given in r25,r24,r23,r22 (r25 is most significant) +; param2: an 8-bit value telling how often to rotate +; given in r20 +; modifys: r21, r22 +rotl32: + cpi r20, 8 + brlo bitrotl + mov r21, r25 + mov r25, r24 + mov r24, r23 + mov r23, r22 + mov r22, r21 + subi r20, 8 + rjmp rotr32 +bitrotl: + clr r21 + clc +bitrotl_loop: + tst r20 + breq fixrotl + rol r22 + rol r23 + rol r24 + rol r25 + rol r21 + dec r20 + rjmp bitrotl_loop +fixrotl: + or r22, r21 + ret + + +;########################################################### + +.global rotr32 +; === ROTR32 === +; function that rotates a 32 bit word to the right +; param1: the 32-bit word to rotate +; given in r25,r24,r23,22 (r25 is most significant) +; param2: an 8-bit value telling how often to rotate +; given in r20 +; modifys: r21, r22 +rotr32: + cpi r20, 8 + brlo bitrotr + mov r21, r22 + mov r22, r23 + mov r23, r24 + mov r24, r25 + mov r25, r21 + subi r20, 8 + rjmp rotr32 +bitrotr: + clr r21 + clc +bitrotr_loop: + tst r20 + breq fixrotr + ror r25 + ror r24 + ror r23 + ror r22 + ror r21 + dec r20 + rjmp bitrotr_loop +fixrotr: + or r25, r21 + ret + + +;########################################################### + +.global change_endian32 +; === change_endian32 === +; function that changes the endianess of a 32-bit word +; param1: the 32-bit word +; given in r25,r24,r23,22 (r25 is most significant) +; modifys: r21, r22 +change_endian32: + movw r20, r22 ; (r22,r23) --> (r20,r21) + mov r22, r25 + mov r23, r24 + mov r24, r21 + mov r25, r20 + ret +*/ diff --git a/sha1.c b/sha1.c new file mode 100644 index 0000000..711c51c --- /dev/null +++ b/sha1.c @@ -0,0 +1,219 @@ +/** + * \file sha1.c + * \author Daniel Otte + * \date 08.10.2006 + * \par License: + * GPL + * \brief SHA-1 implementation. + * + */ + +#include /* memcpy & co */ +#include +#include "config.h" +#undef DEBUG +#include "debug.h" +#include "sha1.h" + +#define LITTLE_ENDIAN + +/********************************************************************************************************/ + +/** + * \brief initialises given SHA-1 context + * + */ +void sha1_init(sha1_ctx_t *state){ + DEBUG_S("\r\nSHA1_INIT"); + state->h[0] = 0x67452301; + state->h[1] = 0xefcdab89; + state->h[2] = 0x98badcfe; + state->h[3] = 0x10325476; + state->h[4] = 0xc3d2e1f0; + state->length = 0; +} + +/********************************************************************************************************/ +/* some helping functions */ +uint32_t rotl32(uint32_t n, uint8_t bits){ + return ((n<>(32-bits))); +} + +uint32_t change_endian32(uint32_t x){ + return (((x)<<24) | ((x)>>24) | (((x)& 0x0000ff00)<<8) | (((x)& 0x00ff0000)>>8)); +} + + +/* three SHA-1 inner functions */ +uint32_t ch(uint32_t x, uint32_t y, uint32_t z){ + DEBUG_S("\r\nCH"); + return ((x&y)^((~x)&z)); +} + +uint32_t maj(uint32_t x, uint32_t y, uint32_t z){ + DEBUG_S("\r\nMAJ"); + return ((x&y)^(x&z)^(y&z)); +} + +uint32_t parity(uint32_t x, uint32_t y, uint32_t z){ + DEBUG_S("\r\nPARITY"); + return ((x^y)^z); +} + +/********************************************************************************************************/ +/** + * \brief "add" a block to the hash + * This is the core function of the hash algorithm. To understand how it's working + * and what thoese variables do, take a look at FIPS-182. This is an "alternativ" implementation + */ + +#define MASK 0x0000000f + +typedef uint32_t (*pf_t)(uint32_t x, uint32_t y, uint32_t z); + +void sha1_nextBlock (sha1_ctx_t *state, void* block){ + uint32_t a[5]; + uint32_t w[16]; + uint32_t temp; + uint8_t t,s; + pf_t f[] = {ch,parity,maj,parity}; + uint32_t k[4]={ 0x5a827999, + 0x6ed9eba1, + 0x8f1bbcdc, + 0xca62c1d6}; + + /* load the w array (changing the endian and so) */ + for(t=0; t<16; ++t){ + w[t] = change_endian32(((uint32_t*)block)[t]); + } + + uint8_t dbgi; + for(dbgi=0; dbgi<16; ++dbgi){ + DEBUG_S("\n\rBlock:"); + DEBUG_B(dbgi); + DEBUG_C(':'); + #ifdef DEBUG + uart_hexdump(&(w[dbgi]) ,4); + #endif + } + + + /* load the state */ + memcpy(a, state->h, 5*sizeof(uint32_t)); + + + /* the fun stuff */ + for(t=0; t<=79; ++t){ + s = t & MASK; + if(t>=16){ + #ifdef DEBUG + DEBUG_S("\r\n ws = "); uart_hexdump(&ws, 4); + #endif + w[s] = rotl32( w[(s+13)&MASK] ^ w[(s+8)&MASK] ^ + w[(s+ 2)&MASK] ^ w[s] ,1); + #ifdef DEBUG + DEBUG_S(" --> ws = "); uart_hexdump(&(w[s]), 4); + #endif + } + + uint32_t dtemp; + temp = rotl32(a[0],5) + (dtemp=f[t/20](a[1],a[2],a[3])) + a[4] + k[t/20] + w[s]; + memmove(&(a[1]), &(a[0]), 4*sizeof(uint32_t)); /* e=d; d=c; c=b; b=a; */ + a[0] = temp; + a[2] = rotl32(a[2],30); /* we might also do rotr32(c,2) */ + + /* debug dump */ + DEBUG_S("\r\nt = "); DEBUG_B(t); + DEBUG_S("; a[]: "); + #ifdef DEBUG + uart_hexdump(a, 5*4); + #endif + DEBUG_S("; k = "); + #ifdef DEBUG + uart_hexdump(&(k[t/20]), 4); + #endif + DEBUG_S("; f(b,c,d) = "); + #ifdef DEBUG + uart_hexdump(&dtemp, 4); + #endif + } + + /* update the state */ + for(t=0; t<5; ++t){ + state->h[t] += a[t]; + } + state->length += 512; +} + +/********************************************************************************************************/ + +void sha1_lastBlock(sha1_ctx_t *state, void* block, uint16_t length){ + uint8_t lb[SHA1_BLOCK_BITS/8]; /* local block */ + state->length += length; + memcpy (&(lb[0]), block, length/8); + + /* set the final one bit */ + if (length & 0x3){ /* if we have single bits at the end */ + lb[length/8] = ((uint8_t*)(block))[length/8]; + } else { + lb[length/8] = 0; + } + lb[length/8] |= 0x80>>(length & 0x3); + length =(length >> 3) + 1; /* from now on length contains the number of BYTES in lb*/ + /* pad with zeros */ + if (length>64-8){ /* not enouth space for 64bit length value */ + memset((void*)(&(lb[length])), 0, 64-length); + sha1_nextBlock(state, lb); + state->length -= 512; + length = 0; + } + memset((void*)(&(lb[length])), 0, 56-length); + /* store the 64bit length value */ +#if defined LITTLE_ENDIAN + /* this is now rolled up */ + uint8_t i; + for (i=1; i<=8; ++i){ + lb[55+i] = (uint8_t)(state->length>>(64- 8*i)); + } +#elif defined BIG_ENDIAN + *((uint64_t)&(lb[56])) = state->length; +#endif + sha1_nextBlock(state, lb); +} + +/********************************************************************************************************/ + +void sha1_ctx2hash (sha1_hash_t *dest, sha1_ctx_t *state){ +#if defined LITTLE_ENDIAN + uint8_t i; + for(i=0; i<8; ++i){ + ((uint32_t*)dest)[i] = change_endian32(state->h[i]); + } +#elif BIG_ENDIAN + if (dest != state->h) + memcpy(dest, state->h, SHA256_HASH_BITS/8); +#else +# error unsupported endian type! +#endif +} + +/********************************************************************************************************/ +/** + * + * + */ +void sha1 (sha1_hash_t *dest, void* msg, uint32_t length){ + sha1_ctx_t s; + DEBUG_S("\r\nBLA BLUB"); + sha1_init(&s); + while(length & (~0x0001ff)){ /* length>=512 */ + DEBUG_S("\r\none block"); + sha1_nextBlock(&s, msg); + msg += SHA1_BLOCK_BITS/8; /* increment pointer to next block */ + length -= SHA1_BLOCK_BITS; + } + sha1_lastBlock(&s, msg, length); + sha1_ctx2hash(dest, &s); +} + + diff --git a/sha1.h b/sha1.h new file mode 100644 index 0000000..4f54d88 --- /dev/null +++ b/sha1.h @@ -0,0 +1,43 @@ +/** + * \file sha1.c + * \author Daniel Otte + * \date 08.10.2006 + * \par License: + * GPL + * \brief SHA-1 declaration. + * + */ + +#ifndef SHA1_H_ +#define SHA1_H_ + +#include + + +#define SHA1_HASH_BITS 160 +#define SHA1_BLOCK_BITS 512 + +/** + * \brief SHA-1 context type + * + */ +typedef struct { + uint32_t h[5]; + uint64_t length; +} sha1_ctx_t; + +typedef uint8_t sha1_hash_t[SHA1_HASH_BITS/8]; + +void sha1_init(sha1_ctx_t *state); + +void sha1_nextBlock (sha1_ctx_t *state, void* block); +void sha1_lastBlock (sha1_ctx_t *state, void* block, uint16_t length); + +void sha1_ctx2hash (sha1_hash_t *dest, sha1_ctx_t *state); +void sha1 (sha1_hash_t *dest, void* msg, uint32_t length); +//uint32_t change_endian32(uint32_t x); + + + + +#endif /*SHA1_H_*/ diff --git a/sha256-asm.S b/sha256-asm.S index 0769e7c..0629b2d 100644 --- a/sha256-asm.S +++ b/sha256-asm.S @@ -96,8 +96,8 @@ SREG = 0x3F ;########################################################### .global sha256_ctx2hash -; === sha256_lastBlock === -; this function does padding & Co. for calculating SHA-256 hashes +; === sha256_ctx2hash === +; this function converts a state into a normal hash (bytestring) ; param1: the 16-bit destination pointer ; given in r25,r24 (r25 is most significant) ; param2: the 16-bit pointer to sha256_ctx structure @@ -840,13 +840,13 @@ update_state_loop: add r20, r21 st Z+, r20 clr r21 -sha256_nexBlock_fix_length: +sha256_nextBlock_fix_length: brcc sha256_nextBlock_epilog ld r20, Z adc r20, r21 st Z+, r20 dec r22 - brne sha256_nexBlock_fix_length + brne sha256_nextBlock_fix_length ; EPILOG sha256_nextBlock_epilog: diff --git a/sha256.c b/sha256.c index fea9340..30ab0a5 100644 --- a/sha256.c +++ b/sha256.c @@ -180,7 +180,7 @@ void sha256_lastBlock(sha256_ctx_t *state, void* block, uint16_t length){ /* * length in bits! */ -void sha256(sha256_hash_t *dest, void* msg, uint32_t length){ /* length could be choosen longer but this is for ?C */ +void sha256(sha256_hash_t *dest, void* msg, uint32_t length){ /* length could be choosen longer but this is for µC */ sha256_ctx_t s; sha256_init(&s); while(length >= SHA256_BLOCK_BITS){ diff --git a/uart.c b/uart.c index c0365e7..45338b5 100644 --- a/uart.c +++ b/uart.c @@ -19,7 +19,7 @@ #endif -#define UART_BAUD_CALC(UART_BAUD_RATE,F_OSC) ((F_CPU)/((UART_BAUD_RATE)*16L)-1) +#define UART_BAUD_CALC(UART_BAUD_RATE,F_OSC) ((F_OSC)/((UART_BAUD_RATE)*16L)-1) #ifdef UART_INTERRUPT