added sha1 in C and AVR-ASM

author bg <bg@b1d182e4-1ff8-0310-901f-bddb46175740>

Fri, 20 Oct 2006 21:03:55 +0000 (21:03 +0000)

committer bg <bg@b1d182e4-1ff8-0310-901f-bddb46175740>

Fri, 20 Oct 2006 21:03:55 +0000 (21:03 +0000)
author bg <bg@b1d182e4-1ff8-0310-901f-bddb46175740>
Fri, 20 Oct 2006 21:03:55 +0000 (21:03 +0000)
committer bg <bg@b1d182e4-1ff8-0310-901f-bddb46175740>
Fri, 20 Oct 2006 21:03:55 +0000 (21:03 +0000)
diff --git a/Makefile b/Makefile

index b207adb6303a05b1fbf3df60f672ae4bd8e5c739..1dbe4e8116933cbd4e8258dc22702676513130e5 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
-PRG        = rc6
+PRG        = sha1
  # cryptotest
-OBJ        = main-rc6-test.o debug.o uart.o serial-tools.o rc6.o
+OBJ        = main-sha1-test.o debug.o uart.o serial-tools.o sha1-asm.o
  # main-md5-test.o debug.o uart.o serial-tools.o md5.o
  # main-cast5-test.o debug.o uart.o serial-tools.o cast5.o
  # main.o debug.o uart.o serial-tools.o sha256-asm.o xtea-asm.o arcfour-asm.o prng.o cast5.o
diff --git a/config.h b/config.h

index 58e9b049dfbe17106627cb966653993d31b6806f..a416ef04c3f2f240f814083bc75031011302989c 100644 (file)
--- a/config.h
+++ b/config.h
@@ -2,13 +2,13 @@
  #define __CONFIG_H__
  #include <avr/io.h>
  
-#define F_CPU 16000000         // Oszillator-Frequenz in Hz
+#define F_CPU 8000000         // Oszillator-Frequenz in Hz
  
  #define DEBUG uart
  
  //c uart.[ch] defines
  #define UART_INTERRUPT 1
-#define UART_BAUD_RATE 2400
+#define UART_BAUD_RATE 38400
  #define UART_RXBUFSIZE 16
  #define UART_TXBUFSIZE 16
  #define UART_LINE_BUFFER_SIZE 40
diff --git a/main-sha1-test.c b/main-sha1-test.c

new file mode 100644 (file)

index 0000000..2bfbf17
--- /dev/null
+++ b/main-sha1-test.c
@@ -0,0 +1,91 @@
+/*
+ * SHA-1 test-suit
+ * 
+*/
+
+#include "config.h"
+#include "serial-tools.h"
+#include "uart.h"
+#include "debug.h"
+
+#include "sha1.h"
+
+#include <stdint.h>
+#include <string.h>
+
+
+/*****************************************************************************
+ *  additional validation-functions                                                                                     *
+ *****************************************************************************/
+
+/*****************************************************************************
+ *  self tests                                                                                                                          *
+ *****************************************************************************/
+
+void sha1_ctx_dump(sha1_ctx_t *s){
+       uint8_t i;
+       uart_putstr("\r\n==== sha1_ctx_dump ====");
+       for(i=0;i<5;++i){
+               uart_putstr("\r\na["); uart_hexdump(&i, 1); uart_putstr("]: ");
+               uart_hexdump(&(s->h[i]), 4);
+       }
+       uart_putstr("\r\nlength"); uart_hexdump(&i, 8);
+} 
+
+void testrun_sha1(void){
+       sha1_hash_t hash;
+       sha1(&hash,"abc",3*8);
+       uart_putstr("\r\nsha1(\"abc\") = \r\n\t");
+       uart_hexdump(hash,SHA1_HASH_BITS/8);
+       
+       sha1(&hash,"abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq",448);
+       uart_putstr("\r\nsha1(\"abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq\") = \r\n\t");
+       uart_hexdump(hash,SHA1_HASH_BITS/8);
+       
+       uart_putstr("\r\nsha1(1,000,000 * 'a') = \r\n\t");
+       {
+               uint8_t block[SHA1_BLOCK_BITS/8];
+               uint16_t i;
+               sha1_ctx_t s;
+               memset(block,'a',SHA1_BLOCK_BITS/8);
+               sha1_init(&s);
+               for(i=0;i<15625; ++i){ /* (1000000/(SHA1_BLOCK_BITS/8)) */
+                       sha1_nextBlock(&s, block);
+               }
+               sha1_lastBlock(&s,block,0);
+               sha1_ctx2hash(&hash, &s);
+       }
+       uart_hexdump(hash,SHA1_HASH_BITS/8);
+       
+
+       uart_putstr("\r\nx");
+}
+
+
+
+/*****************************************************************************
+ *  main                                                                                                                                        *
+ *****************************************************************************/
+
+int main (void){
+       char str[20];
+
+       DEBUG_INIT();
+       uart_putstr("\r\n");
+
+       uart_putstr("\r\n\r\nCrypto-VS (SHA-1)\r\nloaded and running\r\n");
+
+restart:
+       while(1){ 
+               if (!getnextwordn(str,20))  {DEBUG_S("DBG: W1\r\n"); goto error;}
+               if (strcmp(str, "test")) {DEBUG_S("DBG: 1b\r\n"); goto error;}
+                       testrun_sha1();
+               goto restart;           
+               continue;
+       error:
+               uart_putstr("ERROR\r\n");
+       }
+       
+       
+}
+
diff --git a/sha1-asm.S b/sha1-asm.S

new file mode 100644 (file)

index 0000000..278d52a
--- /dev/null
+++ b/sha1-asm.S
@@ -0,0 +1,978 @@
+/*\r
+ * Author:     Daniel Otte\r
+ *\r
+ * License: GPL\r
+*/\r
+; SHA1 implementation in assembler for AVR\r
+SHA1_BLOCK_BITS = 512\r
+SHA1_HASH_BITS = 160\r
+\r
+.macro precall\r
+       /* push r18 - r27, r30 - r31*/\r
+       push r0\r
+       push r1\r
+       push r18\r
+       push r19\r
+       push r20\r
+       push r21\r
+       push r22\r
+       push r23\r
+       push r24\r
+       push r25\r
+       push r26\r
+       push r27\r
+       push r30\r
+       push r31\r
+       clr r1\r
+.endm\r
+\r
+.macro postcall\r
+       pop r31\r
+       pop r30\r
+       pop r27\r
+       pop r26\r
+       pop r25\r
+       pop r24\r
+       pop r23\r
+       pop r22\r
+       pop r21\r
+       pop r20\r
+       pop r19\r
+       pop r18\r
+       pop r1\r
+       pop r0\r
+.endm\r
+\r
+\r
+.macro hexdump length\r
+       push r27\r
+       push r26\r
+       ldi r25, '\r'\r
+       mov r24, r25\r
+       call uart_putc\r
+       ldi r25, '\n'\r
+       mov r24, r25\r
+       call uart_putc\r
+       pop r26\r
+       pop r27\r
+       movw r24, r26\r
+.if \length > 16\r
+       ldi r22, lo8(16)\r
+       ldi r23, hi8(16)\r
+       push r27\r
+       push r26\r
+       call uart_hexdump\r
+       pop r26\r
+       pop r27\r
+       adiw r26, 16\r
+       hexdump \length-16\r
+.else\r
+       ldi r22, lo8(\length)\r
+       ldi r23, hi8(\length)\r
+       call uart_hexdump\r
+.endif\r
+.endm\r
+\r
+.macro delay\r
+/*     \r
+       push r0\r
+       push r1\r
+       clr r0\r
+1:     clr r1\r
+2:     dec r1\r
+       brne 2b\r
+       dec r0\r
+       brne 1b\r
+       pop r1\r
+       pop r0  // */\r
+.endm\r
+\r
+/* X points to Block */\r
+.macro dbg_hexdump length\r
+/*     \r
+       precall\r
+       hexdump \length\r
+       postcall\r
+       // */\r
+.endm\r
+\r
+\r
+\r
+.section .text\r
+\r
+SPL = 0x3D\r
+SPH = 0x3E\r
+SREG = 0x3F\r
+\r
+\r
+;\r
+;sha1_ctx_t is:\r
+;\r
+; [h0][h1][h2][h3][h4][length]\r
+; hn is 32 bit large, length is 64 bit large\r
+\r
+;###########################################################   \r
+\r
+.global sha1_ctx2hash\r
+; === sha1_ctx2hash ===\r
+; this function converts a state into a normal hash (bytestring)\r
+;  param1: the 16-bit destination pointer\r
+;      given in r25,r24 (r25 is most significant)\r
+;  param2: the 16-bit pointer to sha1_ctx structure\r
+;      given in r23,r22\r
+sha1_ctx2hash:\r
+       movw r26, r22\r
+       movw r30, r24\r
+       ldi r21, 5\r
+       sbiw r26, 4\r
+1:     \r
+       ldi r20, 4\r
+       adiw r26, 8\r
+2:     \r
+               ld r0, -X\r
+               st Z+, r0       \r
+       dec r20\r
+       brne 2b\r
+       \r
+       dec r21\r
+       brne 1b\r
+       \r
+       ret\r
+\r
+;###########################################################   \r
+\r
+.global sha1\r
+; === sha1 ===\r
+; this function calculates SHA-1 hashes from messages in RAM\r
+;  param1: the 16-bit hash destination pointer\r
+;      given in r25,r24 (r25 is most significant)\r
+;  param2: the 16-bit pointer to message\r
+;      given in r23,r22\r
+;  param3: 32-bit length value (length of message in bits)\r
+;   given in r21,r20,r19,r18\r
+sha1:\r
+sha1_prolog:\r
+       push r8\r
+       push r9\r
+       push r10\r
+       push r11\r
+       push r12\r
+       push r13\r
+       push r16\r
+       push r17\r
+       in r16, SPL\r
+       in r17, SPH\r
+       subi r16, 5*4+8 \r
+       sbci r17, 0     \r
+       in r0, SREG\r
+       cli\r
+       out SPL, r16\r
+       out SPH, r17\r
+       out SREG, r0\r
+       \r
+       push r25\r
+       push r24\r
+       inc r16\r
+       adc r17, r1\r
+       \r
+       movw r8, r18            /* backup of length*/\r
+       movw r10, r20\r
+       \r
+       movw r12, r22   /* backup pf msg-ptr */\r
+       \r
+       movw r24, r16\r
+       rcall sha1_init\r
+       /* if length >= 512 */\r
+1:\r
+       tst r11\r
+       brne 4f\r
+       tst r10\r
+       brne 4f\r
+       mov r19, r9\r
+       cpi r19, 0x02\r
+       brlo 4f\r
+       \r
+       movw r24, r16\r
+       movw r22, r12\r
+       rcall sha1_nextBlock\r
+       ldi r19, 0x64\r
+       add r22, r19\r
+       adc r23, r1\r
+       /* length -= 512 */\r
+       ldi r19, 0x02\r
+       sub r9, r19\r
+       sbc r10, r1\r
+       sbc r11, r1\r
+       rjmp 1b\r
+       \r
+4:\r
+       movw r24, r16\r
+       movw r22, r12\r
+       movw r20, r8\r
+       rcall sha1_lastBlock\r
+       \r
+       pop r24\r
+       pop r25\r
+       movw r22, r16\r
+       rcall sha1_ctx2hash     \r
+       \r
+sha1_epilog:\r
+       in r30, SPL\r
+       in r31, SPH\r
+       adiw r30, 5*4+8         \r
+       in r0, SREG\r
+       cli\r
+       out SPL, r30\r
+       out SPH, r31\r
+       out SREG, r0\r
+       pop r17\r
+       pop r16\r
+       pop r13\r
+       pop r12\r
+       pop r11\r
+       pop r10\r
+       pop r9\r
+       pop r8\r
+       ret\r
+\r
+;###########################################################   \r
+\r
+\r
+; block MUST NOT be larger than 64 bytes\r
+\r
+.global sha1_lastBlock\r
+; === sha1_lastBlock ===\r
+; this function does padding & Co. for calculating SHA-1 hashes\r
+;  param1: the 16-bit pointer to sha1_ctx structure\r
+;      given in r25,r24 (r25 is most significant)\r
+;  param2: an 16-bit pointer to 64 byte block to hash\r
+;      given in r23,r22\r
+;  param3: an 16-bit integer specifing length of block in bits\r
+;      given in r21,r20\r
+sha1_lastBlock_localSpace = (SHA1_BLOCK_BITS/8+1)\r
+\r
+\r
+sha1_lastBlock:\r
+       tst r20\r
+       brne sha1_lastBlock_prolog\r
+       cpi r21, 0x02\r
+       brne sha1_lastBlock_prolog\r
+       push r25\r
+       push r24\r
+       push r23\r
+       push r22\r
+       rcall sha1_nextBlock\r
+       pop r22\r
+       pop r23\r
+       pop r24\r
+       pop r25\r
+       clr r21\r
+       clr r22\r
+sha1_lastBlock_prolog:\r
+       /* allocate space on stack */\r
+       in r30, SPL\r
+       in r31, SPH\r
+       in r1, SREG\r
+       subi r30, lo8(64)\r
+       sbci r31, hi8(64) /* ??? */\r
+       cli\r
+       out SPL, r30\r
+       out SPH, r31\r
+       out SREG,r1\r
+\r
+       adiw r30, 1 /* SP points to next free byte on stack */\r
+       mov r18, r20 /* r20 = LSB(length) */\r
+       lsr r18\r
+       lsr r18\r
+       lsr r18\r
+       bst r21, 0      /* may be we should explain this ... */\r
+       bld r18, 5  /* now: r18 == length/8 (aka. length in bytes) */\r
+       \r
+       \r
+       movw r26, r22 /* X points to begin of msg */\r
+       tst r18\r
+       breq sha1_lastBlock_post_copy\r
+       mov r1, r18\r
+sha1_lastBlock_copy_loop:\r
+       ld r0, X+\r
+       st Z+, r0\r
+       dec r1\r
+       brne sha1_lastBlock_copy_loop\r
+sha1_lastBlock_post_copy:      \r
+sha1_lastBlock_insert_stuffing_bit:    \r
+       ldi r19, 0x80\r
+       mov r0,r19      \r
+       ldi r19, 0x07\r
+       and r19, r20 /* if we are in bitmode */\r
+       breq 2f /* no bitmode */\r
+1:     \r
+       lsr r0\r
+       dec r19\r
+       brne 1b\r
+       ld r19, X\r
+/* maybe we should do some ANDing here, just for safety */\r
+       or r0, r19\r
+2:     \r
+       st Z+, r0\r
+       inc r18\r
+\r
+/* checking stuff here */\r
+       cpi r18, 64-8+1\r
+       brsh 0f \r
+       rjmp sha1_lastBlock_insert_zeros\r
+0:\r
+       /* oh shit, we landed here */\r
+       /* first we have to fill it up with zeros */\r
+       ldi r19, 64\r
+       sub r19, r18\r
+       breq 2f\r
+1:     \r
+       st Z+, r1\r
+       dec r19\r
+       brne 1b \r
+2:     \r
+       sbiw r30, 63\r
+       sbiw r30,  1\r
+       movw r22, r30\r
+       \r
+       push r31\r
+       push r30\r
+       push r25\r
+       push r24\r
+       push r21\r
+       push r20\r
+       rcall sha1_nextBlock\r
+       pop r20\r
+       pop r21\r
+       pop r24\r
+       pop r25\r
+       pop r30\r
+       pop r31\r
+       \r
+       /* now we should subtract 512 from length */\r
+       movw r26, r24\r
+       adiw r26, 4*5+1 /* we can skip the lowest byte */\r
+       ld r19, X\r
+       subi r19, hi8(512)\r
+       st X+, r19\r
+       ldi r18, 6\r
+1:\r
+       ld r19, X\r
+       sbci r19, 0\r
+       st X+, r19\r
+       dec r18\r
+       brne 1b\r
+       \r
+;      clr r18 /* not neccessary ;-) */\r
+       /* reset Z pointer to begin of block */\r
+\r
+sha1_lastBlock_insert_zeros:   \r
+       ldi r19, 64-8\r
+       sub r19, r18\r
+       breq sha1_lastBlock_insert_length\r
+       clr r1\r
+1:\r
+       st Z+, r1       /* r1 is still zero */\r
+       dec r19\r
+       brne 1b\r
+\r
+;      rjmp sha1_lastBlock_epilog\r
+sha1_lastBlock_insert_length:\r
+       movw r26, r24   /* X points to state */\r
+       adiw r26, 5*4   /* X points to (state.length) */\r
+       adiw r30, 8             /* Z points one after the last byte of block */\r
+       ld r0, X+\r
+       add r0, r20\r
+       st -Z, r0\r
+       ld r0, X+\r
+       adc r0, r21\r
+       st -Z, r0\r
+       ldi r19, 6\r
+1:\r
+       ld r0, X+\r
+       adc r0, r1\r
+       st -Z, r0\r
+       dec r19\r
+       brne 1b\r
+\r
+       sbiw r30, 64-8\r
+       movw r22, r30\r
+       rcall sha1_nextBlock\r
+\r
+sha1_lastBlock_epilog:\r
+       in r30, SPL\r
+       in r31, SPH\r
+       in r1, SREG\r
+       adiw r30, 63 ; lo8(64)\r
+       adiw r30,  1  ; hi8(64)\r
+       cli\r
+       out SPL, r30\r
+       out SPH, r31\r
+       out SREG,r1\r
+       clr r1\r
+       clr r0\r
+       ret\r
+\r
+/**/\r
+;###########################################################   \r
+\r
+.global sha1_nextBlock\r
+; === sha1_nextBlock ===\r
+; this is the core function for calculating SHA-1 hashes\r
+;  param1: the 16-bit pointer to sha1_ctx structure\r
+;      given in r25,r24 (r25 is most significant)\r
+;  param2: an 16-bit pointer to 64 byte block to hash\r
+;      given in r23,r22\r
+sha1_nextBlock_localSpace = (16+5+1)*4 ; 16 32-bit values for w array and 5 32-bit values for a array (total 84 byte)\r
+\r
+xtmp = 0\r
+xNULL = 1\r
+W1 = 10\r
+W2 = 11\r
+T1     = 12\r
+T2     = 13\r
+T3     = 14\r
+T4     = 15\r
+LoopC = 16\r
+S        = 17\r
+tmp1 = 18\r
+tmp2 = 19\r
+tmp3 = 20\r
+tmp4 = 21\r
+F1 = 22\r
+F2 = 23\r
+F3 = 24\r
+F4 = 25\r
+\r
+/* byteorder: high number <--> high significance */\r
+sha1_nextBlock:\r
+ ; initial, let's make some space ready for local vars\r
+                        /* replace push & pop by mem ops? */\r
+       push r10\r
+       push r11\r
+       push r12\r
+       push r13\r
+       push r14\r
+       push r15\r
+       push r16\r
+       push r17\r
+       push r28\r
+       push r29\r
+       in r20, SPL\r
+       in r21, SPH\r
+       movw r18, r20                   ;backup SP\r
+;      movw r26, r20                   ; X points to free space on stack /* maybe removeable? */ \r
+       movw r30, r22                   ; Z points to message\r
+       subi r20, lo8(sha1_nextBlock_localSpace) ;sbiw can do only up to 63\r
+       sbci r21, hi8(sha1_nextBlock_localSpace)\r
+       movw r26, r20                   ; X points to free space on stack \r
+       in r0, SREG\r
+       cli ; we want to be uninterrupted while updating SP\r
+       out SPL, r20\r
+       out SPH, r21\r
+       out SREG, r0\r
+       \r
+       push r18\r
+       push r19 /* push old SP on new stack */\r
+       push r24\r
+       push r25 /* param1 will be needed later */\r
+       \r
+       /* load a[] with state */\r
+       movw 28, r24 /* load pointer to state in Y */\r
+       adiw r26, 1 ; X++\r
+\r
+       ldi LoopC, 5*4  \r
+1:     ld tmp1, Y+\r
+       st X+, tmp1\r
+       dec LoopC\r
+       brne 1b\r
+\r
+       movw W1, r26 /* save pointer to w[0] */\r
+       /* load w[] with endian fixed message */\r
+               /* we might also use the changeendian32() function at bottom */\r
+       movw r30, r22 /* mv param2 (ponter to msg) to Z */      \r
+       ldi LoopC, 16\r
+1:\r
+       ldd tmp1, Z+3\r
+       st X+, tmp1\r
+       ldd tmp1, Z+2\r
+       st X+, tmp1\r
+       ldd tmp1, Z+1\r
+       st X+, tmp1\r
+       ld tmp1, Z\r
+       st X+, tmp1\r
+       adiw r30, 4\r
+       dec LoopC\r
+       brne 1b\r
+       \r
+       ;clr LoopC /* LoopC is named t in FIPS 180-2 */ \r
+       clr xtmp\r
+sha1_nextBlock_mainloop:\r
+       mov S, LoopC\r
+       lsl S\r
+       lsl S\r
+       andi S, 0x3C /* S is a bytepointer so *4 */\r
+       /* load w[s] */\r
+       movw r26, W1\r
+       add r26, S /* X points at w[s] */\r
+       adc r27, xNULL\r
+       ld T1, X+\r
+       ld T2, X+\r
+       ld T3, X+\r
+       ld T4, X+\r
+\r
+       /**/\r
+       push r26\r
+       push r27\r
+       push T4\r
+       push T3\r
+       push T2\r
+       push T1\r
+       in r26, SPL\r
+       in r27, SPH\r
+       adiw r26, 1\r
+       dbg_hexdump 4\r
+       pop T1\r
+       pop T2\r
+       pop T3\r
+       pop T4\r
+       pop r27\r
+       pop r26\r
+       /**/\r
+\r
+       cpi LoopC, 16\r
+       brlt sha1_nextBlock_mainloop_core\r
+       /* update w[s] */\r
+       ldi tmp1, 2*4\r
+       rcall 1f\r
+       ldi tmp1, 8*4\r
+       rcall 1f\r
+       ldi tmp1, 13*4\r
+       rcall 1f\r
+       rjmp 2f\r
+1:             /* this might be "outsourced" to save the jump above */\r
+       add tmp1, S\r
+       andi tmp1, 0x3f\r
+       movw r26, W1\r
+       add r26, tmp1\r
+       adc r27, xNULL\r
+       ld tmp2, X+\r
+       eor T1, tmp2\r
+       ld tmp2, X+\r
+       eor T2, tmp2\r
+       ld tmp2, X+\r
+       eor T3, tmp2\r
+       ld tmp2, X+\r
+       eor T4, tmp2\r
+       ret\r
+2:     /* now we just hav to do a ROTL(T) and save T back */\r
+       mov tmp2, T4\r
+       rol tmp2\r
+       rol T1\r
+       rol T2\r
+       rol T3\r
+       rol T4\r
+       movw r26, W1\r
+       add r26, S\r
+       adc r27, xNULL\r
+       st X+, T1\r
+       st X+, T2\r
+       st X+, T3\r
+       st X+, T4\r
+       \r
+sha1_nextBlock_mainloop_core:  /* ther core function; T=ROTL5(a) ....*/        \r
+                                                               /* T already contains w[s] */\r
+       movw r26, W1\r
+       sbiw r26, 4*1           /* X points at a[4] aka e */\r
+       ld tmp1, X+ \r
+       add T1, tmp1\r
+       ld tmp1, X+ \r
+       adc T2, tmp1\r
+       ld tmp1, X+ \r
+       adc T3, tmp1\r
+       ld tmp1, X+ \r
+       adc T4, tmp1            /* T = w[s]+e */\r
+       sbiw r26, 4*5           /* X points at a[0] aka a */\r
+       ld F1, X+ \r
+       ld F2, X+ \r
+       ld F3, X+ \r
+       ld F4, X+ \r
+       mov tmp1, F4            /* X points at a[1] aka b */\r
+       ldi tmp2, 5\r
+1:\r
+       rol tmp1\r
+       rol F1\r
+       rol F2\r
+       rol F3\r
+       rol F4\r
+       dec tmp2\r
+       brne 1b\r
+       \r
+       add T1, F1\r
+       adc T2, F2\r
+       adc T3, F3\r
+       adc T4, F4 /* T = ROTL(a,5) + e + w[s] */\r
+       \r
+       /* now we have to do this fucking conditional stuff */\r
+       ldi r30, lo8(sha1_nextBlock_xTable)\r
+       ldi r31, hi8(sha1_nextBlock_xTable)\r
+       add r30, xtmp\r
+       adc r31, xNULL\r
+       lpm tmp1, Z\r
+       cp tmp1, LoopC\r
+       brne 1f\r
+       inc xtmp\r
+1:     ldi r30, lo8(sha1_nextBlock_KTable)\r
+       ldi r31, hi8(sha1_nextBlock_KTable)\r
+       lsl xtmp\r
+       lsl xtmp\r
+       add r30, xtmp\r
+       adc r31, xNULL\r
+       lsr xtmp\r
+       lsr xtmp\r
+        \r
+       lpm tmp1, Z+\r
+       add T1, tmp1\r
+       lpm tmp1, Z+\r
+       adc T2, tmp1\r
+       lpm tmp1, Z+\r
+       adc T3, tmp1\r
+       lpm tmp1, Z+\r
+       adc T4, tmp1\r
+                       /* T = ROTL(a,5) + e + kt + w[s] */\r
+       \r
+       /* wo Z-4 gerade auf kt zeigt ... */\r
+       movw r28, r26 /* copy X in Y */\r
+       adiw r30, 3*4 /* now Z points to the rigth locatin in our jump-vector-table */\r
+       clc\r
+       ror r31\r
+       ror r30\r
+               \r
+       icall\r
+       mov F1, tmp1\r
+       icall\r
+       mov F2, tmp1\r
+       icall\r
+       mov F3, tmp1\r
+       icall\r
+       \r
+       add T1, F1\r
+       adc T2, F2\r
+       adc T3, F3\r
+       adc T4, tmp1 /* T = ROTL5(a) + f_t(b,c,d) + e + k_t + w[s] */\r
+                                /* X points still at a[1] aka b, Y points at a[2] aka c */     \r
+       /* update a[] */\r
+sha1_nextBlock_update_a:\r
+       /*first we move all vars in a[] "one up" e=d, d=c, c=b, b=a*/\r
+       //adiw r28, 3*4  /* Y should point at a[4] aka e */\r
+       movw r28, W1\r
+       sbiw r28, 4\r
+       \r
+       ldi tmp2, 4*4 \r
+1:     \r
+       ld tmp1, -Y\r
+       std Y+4, tmp1\r
+       dec tmp2\r
+       brne 1b\r
+       /* Y points at a[0] aka a*/\r
+       \r
+       movw r28, W1\r
+       sbiw r28, 5*4\r
+       /* store T in a[0] aka a */\r
+       st Y+, T1\r
+       st Y+, T2\r
+       st Y+, T3\r
+       st Y+, T4\r
+       /* Y points at a[1] aka b*/\r
+       \r
+       /* rotate c */\r
+       ldd T1, Y+1*4\r
+       ldd T2, Y+1*4+1\r
+       ldd T3, Y+1*4+2\r
+       ldd T4, Y+1*4+3\r
+       mov tmp1, T1\r
+       ldi tmp2, 2\r
+1:     ror tmp1\r
+       ror T4\r
+       ror T3\r
+       ror T2\r
+       ror T1\r
+       dec tmp2\r
+       brne 1b\r
+       std Y+1*4+0, T1\r
+       std Y+1*4+1, T2\r
+       std Y+1*4+2, T3\r
+       std Y+1*4+3, T4\r
+       \r
+       push r27\r
+       push r26\r
+       movw r26, W1\r
+       sbiw r26, 4*5\r
+       dbg_hexdump 4*5\r
+       pop r26\r
+       pop r27\r
+       \r
+       inc LoopC\r
+       cpi LoopC, 80\r
+       brge 1f\r
+       jmp sha1_nextBlock_mainloop\r
+/**************************************/\r
+1:     \r
+   /* littel patch */\r
+       sbiw r28, 4\r
+\r
+/* add a[] to state and inc length */  \r
+       pop r27\r
+       pop r26         /* now X points to state (and Y still at a[0]) */\r
+       ldi tmp4, 5\r
+1:     clc\r
+       ldi tmp3, 4\r
+2:     ld tmp1, X\r
+       ld tmp2, Y+\r
+       adc tmp1, tmp2\r
+       st X+, tmp1\r
+       dec tmp3\r
+       brne 2b\r
+       dec tmp4\r
+       brne 1b\r
+       \r
+       /* now length += 512 */\r
+       adiw r26, 1 /* we skip the least significant byte */\r
+       ld tmp1, X\r
+       ldi tmp2, hi8(512) /* 2 */\r
+       add tmp1, tmp2\r
+       st X+, tmp1\r
+       ldi tmp2, 6\r
+1:\r
+       ld tmp1, X\r
+       adc tmp1, xNULL\r
+       st X+, tmp1\r
+       dec tmp2\r
+       brne 1b\r
+       \r
+; EPILOG\r
+sha1_nextBlock_epilog:\r
+/* now we should clean up the stack */\r
+       pop r21\r
+       pop r20\r
+       in r0, SREG\r
+       cli ; we want to be uninterrupted while updating SP\r
+       out SPL, r20\r
+       out SPH, r21\r
+       out SREG, r0\r
+       \r
+       clr r1\r
+       pop r29\r
+       pop r28\r
+       pop r17\r
+       pop r16\r
+       pop r15\r
+       pop r14\r
+       pop r13\r
+       pop r12\r
+       pop r11\r
+       pop r10\r
+       ret\r
+\r
+sha1_nextBlock_xTable:\r
+.byte 20,40,60,0\r
+sha1_nextBlock_KTable:\r
+.int   0x5a827999 \r
+.int   0x6ed9eba1 \r
+.int   0x8f1bbcdc \r
+.int   0xca62c1d6\r
+sha1_nextBlock_JumpTable:\r
+jmp sha1_nextBlock_Ch  \r
+jmp sha1_nextBlock_Parity\r
+jmp sha1_nextBlock_Maj\r
+jmp sha1_nextBlock_Parity\r
+\r
+        /* X and Y still point at a[1] aka b ; return value in tmp1 */\r
+sha1_nextBlock_Ch:\r
+       ld tmp1, Y+\r
+       mov tmp2, tmp1\r
+       com tmp2\r
+       ldd tmp3, Y+3   /* load from c */\r
+       and tmp1, tmp3\r
+       ldd tmp3, Y+7   /* load from d */\r
+       and tmp2, tmp3\r
+       eor tmp1, tmp2\r
+       /**\r
+       precall\r
+       ldi r24, lo8(ch_str)\r
+       ldi r25, hi8(ch_str)\r
+       call uart_putstr_P\r
+       postcall\r
+       /**/\r
+       ret\r
+       \r
+sha1_nextBlock_Maj:\r
+       ld tmp1, Y+\r
+       mov tmp2, tmp1\r
+       ldd tmp3, Y+3   /* load from c */\r
+       and tmp1, tmp3\r
+       ldd tmp4, Y+7   /* load from d */\r
+       and tmp2, tmp4\r
+       eor tmp1, tmp2\r
+       and tmp3, tmp4\r
+       eor tmp1, tmp3\r
+       /**\r
+       precall\r
+       ldi r24, lo8(maj_str)\r
+       ldi r25, hi8(maj_str)\r
+       call uart_putstr_P\r
+       postcall\r
+       /**/\r
+       ret\r
+\r
+sha1_nextBlock_Parity:\r
+       ld tmp1, Y+\r
+       ldd tmp2, Y+3   /* load from c */\r
+       eor tmp1, tmp2\r
+       ldd tmp2, Y+7   /* load from d */\r
+       eor tmp1, tmp2\r
+       \r
+       /**\r
+       precall\r
+       ldi r24, lo8(parity_str)\r
+       ldi r25, hi8(parity_str)\r
+       call uart_putstr_P\r
+       postcall\r
+       /**/\r
+       ret\r
+/*     \r
+ch_str:                        .asciz "\r\nCh"\r
+maj_str:               .asciz "\r\nMaj"\r
+parity_str:    .asciz "\r\nParity"\r
+*/\r
+;###########################################################   \r
+\r
+.global sha1_init \r
+;void sha1_init(sha1_ctx_t *state){\r
+;      DEBUG_S("\r\nSHA1_INIT");\r
+;      state->h[0] = 0x67452301;\r
+;      state->h[1] = 0xefcdab89;\r
+;      state->h[2] = 0x98badcfe;\r
+;      state->h[3] = 0x10325476;\r
+;      state->h[4] = 0xc3d2e1f0;\r
+;      state->length = 0;\r
+;}\r
+; param1: (Func3,r24) 16-bit pointer to sha1_ctx_t struct in ram\r
+; modifys: Z(r30,r31), Func1, r22\r
+sha1_init:\r
+       movw r26, r24 ; (24,25) --> (26,27) load X with param1\r
+       ldi r30, lo8((sha1_init_vector))\r
+       ldi r31, hi8((sha1_init_vector))\r
+       ldi r22, 5*4 /* bytes to copy */\r
+sha1_init_vloop:       \r
+       lpm r23, Z+ \r
+       st X+, r23\r
+       dec r22\r
+       brne sha1_init_vloop\r
+       ldi r22, 8\r
+       clr r1 /* this should not be needed */\r
+sha1_init_lloop:\r
+       st X+, r1\r
+       dec r22\r
+       brne sha1_init_lloop\r
+       ret\r
+       \r
+sha1_init_vector:\r
+.int 0x67452301;\r
+.int 0xefcdab89;\r
+.int 0x98badcfe;\r
+.int 0x10325476;\r
+.int 0xc3d2e1f0;\r
+/*\r
+;###########################################################   \r
+\r
+.global rotl32\r
+; === ROTL32 ===\r
+; function that rotates a 32 bit word to the left\r
+;  param1: the 32-bit word to rotate\r
+;      given in r25,r24,r23,r22 (r25 is most significant)\r
+;  param2: an 8-bit value telling how often to rotate\r
+;      given in r20\r
+; modifys: r21, r22\r
+rotl32:\r
+       cpi r20, 8\r
+       brlo bitrotl\r
+       mov r21, r25\r
+       mov r25, r24\r
+       mov r24, r23\r
+       mov r23, r22\r
+       mov r22, r21\r
+       subi r20, 8\r
+       rjmp rotr32\r
+bitrotl:\r
+       clr r21\r
+       clc\r
+bitrotl_loop:  \r
+       tst r20\r
+       breq fixrotl\r
+       rol r22\r
+       rol r23\r
+       rol r24\r
+       rol r25\r
+       rol r21\r
+       dec r20\r
+       rjmp bitrotl_loop\r
+fixrotl:\r
+       or r22, r21\r
+       ret\r
+       \r
+\r
+;###########################################################   \r
+\r
+.global rotr32\r
+; === ROTR32 ===\r
+; function that rotates a 32 bit word to the right\r
+;  param1: the 32-bit word to rotate\r
+;      given in r25,r24,r23,22 (r25 is most significant)\r
+;  param2: an 8-bit value telling how often to rotate\r
+;      given in r20\r
+; modifys: r21, r22\r
+rotr32:\r
+       cpi r20, 8\r
+       brlo bitrotr\r
+       mov r21, r22\r
+       mov r22, r23\r
+       mov r23, r24\r
+       mov r24, r25\r
+       mov r25, r21\r
+       subi r20, 8\r
+       rjmp rotr32\r
+bitrotr:\r
+       clr r21\r
+       clc\r
+bitrotr_loop:  \r
+       tst r20\r
+       breq fixrotr\r
+       ror r25\r
+       ror r24\r
+       ror r23\r
+       ror r22\r
+       ror r21\r
+       dec r20\r
+       rjmp bitrotr_loop\r
+fixrotr:\r
+       or r25, r21\r
+       ret\r
+       \r
+       \r
+;###########################################################   \r
+       \r
+.global change_endian32\r
+; === change_endian32 ===\r
+; function that changes the endianess of a 32-bit word\r
+;  param1: the 32-bit word\r
+;      given in r25,r24,r23,22 (r25 is most significant)\r
+;  modifys: r21, r22\r
+change_endian32:\r
+       movw r20,  r22 ; (r22,r23) --> (r20,r21)\r
+       mov r22, r25\r
+       mov r23, r24\r
+       mov r24, r21\r
+       mov r25, r20 \r
+       ret\r
+*/\r
diff --git a/sha1.c b/sha1.c

new file mode 100644 (file)

index 0000000..711c51c
--- /dev/null
+++ b/sha1.c
@@ -0,0 +1,219 @@
+/**
+ * \file       sha1.c
+ * \author     Daniel Otte
+ * \date       08.10.2006
+ * \par License:
+ * GPL
+ * \brief SHA-1 implementation.
+ * 
+ */
+ 
+#include <string.h> /* memcpy & co */
+#include <stdint.h>
+#include "config.h"
+#undef DEBUG
+#include "debug.h"
+#include "sha1.h"
+
+#define LITTLE_ENDIAN
+
+/********************************************************************************************************/
+ 
+/**
+ * \brief initialises given SHA-1 context
+ * 
+ */
+void sha1_init(sha1_ctx_t *state){
+       DEBUG_S("\r\nSHA1_INIT");
+       state->h[0] = 0x67452301;
+       state->h[1] = 0xefcdab89;
+       state->h[2] = 0x98badcfe;
+       state->h[3] = 0x10325476;
+       state->h[4] = 0xc3d2e1f0;
+       state->length = 0;
+}
+
+/********************************************************************************************************/
+/* some helping functions */
+uint32_t rotl32(uint32_t n, uint8_t bits){
+       return ((n<<bits) | (n>>(32-bits)));
+}
+
+uint32_t change_endian32(uint32_t x){
+       return (((x)<<24) | ((x)>>24) | (((x)& 0x0000ff00)<<8) | (((x)& 0x00ff0000)>>8));
+}
+
+
+/* three SHA-1 inner functions */
+uint32_t ch(uint32_t x, uint32_t y, uint32_t z){
+       DEBUG_S("\r\nCH");
+       return ((x&y)^((~x)&z));
+}
+
+uint32_t maj(uint32_t x, uint32_t y, uint32_t z){
+       DEBUG_S("\r\nMAJ");
+       return ((x&y)^(x&z)^(y&z));
+}
+
+uint32_t parity(uint32_t x, uint32_t y, uint32_t z){
+       DEBUG_S("\r\nPARITY");
+       return ((x^y)^z);
+}
+
+/********************************************************************************************************/
+/**
+ * \brief "add" a block to the hash
+ * This is the core function of the hash algorithm. To understand how it's working
+ * and what thoese variables do, take a look at FIPS-182. This is an "alternativ" implementation 
+ */
+
+#define MASK 0x0000000f 
+
+typedef uint32_t (*pf_t)(uint32_t x, uint32_t y, uint32_t z);
+
+void sha1_nextBlock (sha1_ctx_t *state, void* block){
+       uint32_t a[5];
+       uint32_t w[16];
+       uint32_t temp;
+       uint8_t t,s;
+       pf_t f[] = {ch,parity,maj,parity};
+       uint32_t k[4]={ 0x5a827999, 
+                                       0x6ed9eba1, 
+                                       0x8f1bbcdc, 
+                                       0xca62c1d6};
+       
+       /* load the w array (changing the endian and so) */
+       for(t=0; t<16; ++t){
+               w[t] = change_endian32(((uint32_t*)block)[t]);
+       }
+
+       uint8_t dbgi;
+       for(dbgi=0; dbgi<16; ++dbgi){
+               DEBUG_S("\n\rBlock:");
+               DEBUG_B(dbgi);
+               DEBUG_C(':');
+               #ifdef DEBUG
+                       uart_hexdump(&(w[dbgi]) ,4);
+               #endif
+       }
+       
+       
+       /* load the state */
+       memcpy(a, state->h, 5*sizeof(uint32_t));
+       
+       
+       /* the fun stuff */
+       for(t=0; t<=79; ++t){
+               s = t & MASK;
+               if(t>=16){
+                       #ifdef DEBUG
+                        DEBUG_S("\r\n ws = "); uart_hexdump(&ws, 4);
+                       #endif
+                       w[s] = rotl32( w[(s+13)&MASK] ^ w[(s+8)&MASK] ^ 
+                                w[(s+ 2)&MASK] ^ w[s] ,1);                     
+                       #ifdef DEBUG
+                        DEBUG_S(" --> ws = "); uart_hexdump(&(w[s]), 4);
+                       #endif
+               }
+               
+               uint32_t dtemp;
+               temp = rotl32(a[0],5) + (dtemp=f[t/20](a[1],a[2],a[3])) + a[4] + k[t/20] + w[s];
+               memmove(&(a[1]), &(a[0]), 4*sizeof(uint32_t)); /* e=d; d=c; c=b; b=a; */
+               a[0] = temp;
+               a[2] = rotl32(a[2],30); /* we might also do rotr32(c,2) */
+               
+               /* debug dump */
+               DEBUG_S("\r\nt = "); DEBUG_B(t);
+               DEBUG_S("; a[]: ");
+               #ifdef DEBUG
+                uart_hexdump(a, 5*4);
+               #endif
+               DEBUG_S("; k = ");
+               #ifdef DEBUG
+                uart_hexdump(&(k[t/20]), 4);
+               #endif
+               DEBUG_S("; f(b,c,d) = ");
+               #ifdef DEBUG
+                uart_hexdump(&dtemp, 4);
+               #endif
+       }
+       
+       /* update the state */
+       for(t=0; t<5; ++t){
+               state->h[t] += a[t];
+       }
+       state->length += 512;
+}
+
+/********************************************************************************************************/
+
+void sha1_lastBlock(sha1_ctx_t *state, void* block, uint16_t length){
+       uint8_t lb[SHA1_BLOCK_BITS/8]; /* local block */
+       state->length += length;
+       memcpy (&(lb[0]), block, length/8);
+       
+       /* set the final one bit */
+       if (length & 0x3){ /* if we have single bits at the end */
+               lb[length/8] = ((uint8_t*)(block))[length/8];
+       } else {
+               lb[length/8] = 0;
+       }
+       lb[length/8] |= 0x80>>(length & 0x3);
+       length =(length >> 3) + 1; /* from now on length contains the number of BYTES in lb*/
+       /* pad with zeros */
+       if (length>64-8){ /* not enouth space for 64bit length value */
+               memset((void*)(&(lb[length])), 0, 64-length);
+               sha1_nextBlock(state, lb);
+               state->length -= 512;
+               length = 0;     
+       }
+       memset((void*)(&(lb[length])), 0, 56-length);
+       /* store the 64bit length value */
+#if defined LITTLE_ENDIAN
+               /* this is now rolled up */
+       uint8_t i;      
+       for (i=1; i<=8; ++i){
+               lb[55+i] = (uint8_t)(state->length>>(64- 8*i));
+       }
+#elif defined BIG_ENDIAN
+       *((uint64_t)&(lb[56])) = state->length;
+#endif
+       sha1_nextBlock(state, lb);
+}
+
+/********************************************************************************************************/
+
+void sha1_ctx2hash (sha1_hash_t *dest, sha1_ctx_t *state){
+#if defined LITTLE_ENDIAN
+       uint8_t i;
+       for(i=0; i<8; ++i){
+               ((uint32_t*)dest)[i] = change_endian32(state->h[i]);
+       }
+#elif BIG_ENDIAN
+       if (dest != state->h)
+               memcpy(dest, state->h, SHA256_HASH_BITS/8);
+#else
+# error unsupported endian type!
+#endif
+}
+
+/********************************************************************************************************/
+/**
+ * 
+ * 
+ */
+void sha1 (sha1_hash_t *dest, void* msg, uint32_t length){
+       sha1_ctx_t s;
+       DEBUG_S("\r\nBLA BLUB");
+       sha1_init(&s);
+       while(length & (~0x0001ff)){ /* length>=512 */
+               DEBUG_S("\r\none block");
+               sha1_nextBlock(&s, msg);
+               msg += SHA1_BLOCK_BITS/8; /* increment pointer to next block */
+               length -= SHA1_BLOCK_BITS;
+       }
+       sha1_lastBlock(&s, msg, length);
+       sha1_ctx2hash(dest, &s);
+}
+
+
diff --git a/sha1.h b/sha1.h

new file mode 100644 (file)

index 0000000..4f54d88
--- /dev/null
+++ b/sha1.h
@@ -0,0 +1,43 @@
+/**
+ * \file       sha1.c
+ * \author     Daniel Otte
+ * \date       08.10.2006
+ * \par License:
+ * GPL
+ * \brief SHA-1 declaration.
+ * 
+ */
+ 
+#ifndef SHA1_H_
+#define SHA1_H_
+
+#include <stdint.h>
+
+
+#define SHA1_HASH_BITS  160
+#define SHA1_BLOCK_BITS 512
+
+/**
+ * \brief SHA-1 context type
+ * 
+ */
+typedef struct {
+       uint32_t h[5];
+       uint64_t length;
+} sha1_ctx_t;
+
+typedef uint8_t sha1_hash_t[SHA1_HASH_BITS/8];
+
+void sha1_init(sha1_ctx_t *state);
+
+void sha1_nextBlock (sha1_ctx_t *state, void* block);
+void sha1_lastBlock (sha1_ctx_t *state, void* block, uint16_t length);
+
+void sha1_ctx2hash (sha1_hash_t *dest, sha1_ctx_t *state);
+void sha1 (sha1_hash_t *dest, void* msg, uint32_t length);
+//uint32_t change_endian32(uint32_t x);
+
+
+
+
+#endif /*SHA1_H_*/
diff --git a/sha256-asm.S b/sha256-asm.S

index 0769e7cd28e5a8e41d2c0f6356ed6c1c1cc2f0be..0629b2dcc60613d54b6772e92c85fb42578c1314 100644 (file)
--- a/sha256-asm.S
+++ b/sha256-asm.S
@@ -96,8 +96,8 @@ SREG = 0x3F
  ;###########################################################   \r
  \r
  .global sha256_ctx2hash\r
-; === sha256_lastBlock ===\r
-; this function does padding & Co. for calculating SHA-256 hashes\r
+; === sha256_ctx2hash ===\r
+; this function converts a state into a normal hash (bytestring)\r
  ;  param1: the 16-bit destination pointer\r
  ;      given in r25,r24 (r25 is most significant)\r
  ;  param2: the 16-bit pointer to sha256_ctx structure\r
@@ -840,13 +840,13 @@ update_state_loop:
         add r20, r21\r
         st Z+, r20      \r
         clr r21\r
-sha256_nexBlock_fix_length:    \r
+sha256_nextBlock_fix_length:   \r
         brcc sha256_nextBlock_epilog\r
         ld r20, Z\r
         adc r20, r21\r
         st Z+, r20\r
         dec r22\r
-       brne sha256_nexBlock_fix_length\r
+       brne sha256_nextBlock_fix_length\r
         \r
  ; EPILOG\r
  sha256_nextBlock_epilog:\r
diff --git a/sha256.c b/sha256.c

index fea934028cf43a52e553dede176c19c6a1731360..30ab0a53494a6aff4cb147b119c796681b264723 100644 (file)
--- a/sha256.c
+++ b/sha256.c
@@ -180,7 +180,7 @@ void sha256_lastBlock(sha256_ctx_t *state, void* block, uint16_t length){
  /*
   * length in bits!
   */
-void sha256(sha256_hash_t *dest, void* msg, uint32_t length){ /* length could be choosen longer but this is for ?C */
+void sha256(sha256_hash_t *dest, void* msg, uint32_t length){ /* length could be choosen longer but this is for µC */
         sha256_ctx_t s;
         sha256_init(&s);
         while(length >= SHA256_BLOCK_BITS){
diff --git a/uart.c b/uart.c

index c0365e7114a29cf8455e3cd27615fbcca4799037..45338b58861bee49b205eaa99763555f3b5ba8df 100644 (file)
--- a/uart.c
+++ b/uart.c
@@ -19,7 +19,7 @@
  #endif
  
  
-#define UART_BAUD_CALC(UART_BAUD_RATE,F_OSC) ((F_CPU)/((UART_BAUD_RATE)*16L)-1)
+#define UART_BAUD_CALC(UART_BAUD_RATE,F_OSC) ((F_OSC)/((UART_BAUD_RATE)*16L)-1)
  
  
  #ifdef UART_INTERRUPT
author	bg <bg@b1d182e4-1ff8-0310-901f-bddb46175740>
	Fri, 20 Oct 2006 21:03:55 +0000 (21:03 +0000)
committer	bg <bg@b1d182e4-1ff8-0310-901f-bddb46175740>
	Fri, 20 Oct 2006 21:03:55 +0000 (21:03 +0000)
Makefile		patch \| blob \| history
config.h		patch \| blob \| history
main-sha1-test.c	[new file with mode: 0644]	patch \| blob
sha1-asm.S	[new file with mode: 0644]	patch \| blob
sha1.c	[new file with mode: 0644]	patch \| blob
sha1.h	[new file with mode: 0644]	patch \| blob
sha256-asm.S		patch \| blob \| history
sha256.c		patch \| blob \| history
uart.c		patch \| blob \| history