--- /dev/null
+/*\r
+ * Author: Daniel Otte\r
+ *\r
+ * License: GPL\r
+*/\r
+; SHA1 implementation in assembler for AVR\r
+SHA1_BLOCK_BITS = 512\r
+SHA1_HASH_BITS = 160\r
+\r
+.macro precall\r
+ /* push r18 - r27, r30 - r31*/\r
+ push r0\r
+ push r1\r
+ push r18\r
+ push r19\r
+ push r20\r
+ push r21\r
+ push r22\r
+ push r23\r
+ push r24\r
+ push r25\r
+ push r26\r
+ push r27\r
+ push r30\r
+ push r31\r
+ clr r1\r
+.endm\r
+\r
+.macro postcall\r
+ pop r31\r
+ pop r30\r
+ pop r27\r
+ pop r26\r
+ pop r25\r
+ pop r24\r
+ pop r23\r
+ pop r22\r
+ pop r21\r
+ pop r20\r
+ pop r19\r
+ pop r18\r
+ pop r1\r
+ pop r0\r
+.endm\r
+\r
+\r
+.macro hexdump length\r
+ push r27\r
+ push r26\r
+ ldi r25, '\r'\r
+ mov r24, r25\r
+ call uart_putc\r
+ ldi r25, '\n'\r
+ mov r24, r25\r
+ call uart_putc\r
+ pop r26\r
+ pop r27\r
+ movw r24, r26\r
+.if \length > 16\r
+ ldi r22, lo8(16)\r
+ ldi r23, hi8(16)\r
+ push r27\r
+ push r26\r
+ call uart_hexdump\r
+ pop r26\r
+ pop r27\r
+ adiw r26, 16\r
+ hexdump \length-16\r
+.else\r
+ ldi r22, lo8(\length)\r
+ ldi r23, hi8(\length)\r
+ call uart_hexdump\r
+.endif\r
+.endm\r
+\r
+.macro delay\r
+/* \r
+ push r0\r
+ push r1\r
+ clr r0\r
+1: clr r1\r
+2: dec r1\r
+ brne 2b\r
+ dec r0\r
+ brne 1b\r
+ pop r1\r
+ pop r0 // */\r
+.endm\r
+\r
+/* X points to Block */\r
+.macro dbg_hexdump length\r
+/* \r
+ precall\r
+ hexdump \length\r
+ postcall\r
+ // */\r
+.endm\r
+\r
+\r
+\r
+.section .text\r
+\r
+SPL = 0x3D\r
+SPH = 0x3E\r
+SREG = 0x3F\r
+\r
+\r
+;\r
+;sha1_ctx_t is:\r
+;\r
+; [h0][h1][h2][h3][h4][length]\r
+; hn is 32 bit large, length is 64 bit large\r
+\r
+;########################################################### \r
+\r
+.global sha1_ctx2hash\r
+; === sha1_ctx2hash ===\r
+; this function converts a state into a normal hash (bytestring)\r
+; param1: the 16-bit destination pointer\r
+; given in r25,r24 (r25 is most significant)\r
+; param2: the 16-bit pointer to sha1_ctx structure\r
+; given in r23,r22\r
+sha1_ctx2hash:\r
+ movw r26, r22\r
+ movw r30, r24\r
+ ldi r21, 5\r
+ sbiw r26, 4\r
+1: \r
+ ldi r20, 4\r
+ adiw r26, 8\r
+2: \r
+ ld r0, -X\r
+ st Z+, r0 \r
+ dec r20\r
+ brne 2b\r
+ \r
+ dec r21\r
+ brne 1b\r
+ \r
+ ret\r
+\r
+;########################################################### \r
+\r
+.global sha1\r
+; === sha1 ===\r
+; this function calculates SHA-1 hashes from messages in RAM\r
+; param1: the 16-bit hash destination pointer\r
+; given in r25,r24 (r25 is most significant)\r
+; param2: the 16-bit pointer to message\r
+; given in r23,r22\r
+; param3: 32-bit length value (length of message in bits)\r
+; given in r21,r20,r19,r18\r
+sha1:\r
+sha1_prolog:\r
+ push r8\r
+ push r9\r
+ push r10\r
+ push r11\r
+ push r12\r
+ push r13\r
+ push r16\r
+ push r17\r
+ in r16, SPL\r
+ in r17, SPH\r
+ subi r16, 5*4+8 \r
+ sbci r17, 0 \r
+ in r0, SREG\r
+ cli\r
+ out SPL, r16\r
+ out SPH, r17\r
+ out SREG, r0\r
+ \r
+ push r25\r
+ push r24\r
+ inc r16\r
+ adc r17, r1\r
+ \r
+ movw r8, r18 /* backup of length*/\r
+ movw r10, r20\r
+ \r
+ movw r12, r22 /* backup pf msg-ptr */\r
+ \r
+ movw r24, r16\r
+ rcall sha1_init\r
+ /* if length >= 512 */\r
+1:\r
+ tst r11\r
+ brne 4f\r
+ tst r10\r
+ brne 4f\r
+ mov r19, r9\r
+ cpi r19, 0x02\r
+ brlo 4f\r
+ \r
+ movw r24, r16\r
+ movw r22, r12\r
+ rcall sha1_nextBlock\r
+ ldi r19, 0x64\r
+ add r22, r19\r
+ adc r23, r1\r
+ /* length -= 512 */\r
+ ldi r19, 0x02\r
+ sub r9, r19\r
+ sbc r10, r1\r
+ sbc r11, r1\r
+ rjmp 1b\r
+ \r
+4:\r
+ movw r24, r16\r
+ movw r22, r12\r
+ movw r20, r8\r
+ rcall sha1_lastBlock\r
+ \r
+ pop r24\r
+ pop r25\r
+ movw r22, r16\r
+ rcall sha1_ctx2hash \r
+ \r
+sha1_epilog:\r
+ in r30, SPL\r
+ in r31, SPH\r
+ adiw r30, 5*4+8 \r
+ in r0, SREG\r
+ cli\r
+ out SPL, r30\r
+ out SPH, r31\r
+ out SREG, r0\r
+ pop r17\r
+ pop r16\r
+ pop r13\r
+ pop r12\r
+ pop r11\r
+ pop r10\r
+ pop r9\r
+ pop r8\r
+ ret\r
+\r
+;########################################################### \r
+\r
+\r
+; block MUST NOT be larger than 64 bytes\r
+\r
+.global sha1_lastBlock\r
+; === sha1_lastBlock ===\r
+; this function does padding & Co. for calculating SHA-1 hashes\r
+; param1: the 16-bit pointer to sha1_ctx structure\r
+; given in r25,r24 (r25 is most significant)\r
+; param2: an 16-bit pointer to 64 byte block to hash\r
+; given in r23,r22\r
+; param3: an 16-bit integer specifing length of block in bits\r
+; given in r21,r20\r
+sha1_lastBlock_localSpace = (SHA1_BLOCK_BITS/8+1)\r
+\r
+\r
+sha1_lastBlock:\r
+ tst r20\r
+ brne sha1_lastBlock_prolog\r
+ cpi r21, 0x02\r
+ brne sha1_lastBlock_prolog\r
+ push r25\r
+ push r24\r
+ push r23\r
+ push r22\r
+ rcall sha1_nextBlock\r
+ pop r22\r
+ pop r23\r
+ pop r24\r
+ pop r25\r
+ clr r21\r
+ clr r22\r
+sha1_lastBlock_prolog:\r
+ /* allocate space on stack */\r
+ in r30, SPL\r
+ in r31, SPH\r
+ in r1, SREG\r
+ subi r30, lo8(64)\r
+ sbci r31, hi8(64) /* ??? */\r
+ cli\r
+ out SPL, r30\r
+ out SPH, r31\r
+ out SREG,r1\r
+\r
+ adiw r30, 1 /* SP points to next free byte on stack */\r
+ mov r18, r20 /* r20 = LSB(length) */\r
+ lsr r18\r
+ lsr r18\r
+ lsr r18\r
+ bst r21, 0 /* may be we should explain this ... */\r
+ bld r18, 5 /* now: r18 == length/8 (aka. length in bytes) */\r
+ \r
+ \r
+ movw r26, r22 /* X points to begin of msg */\r
+ tst r18\r
+ breq sha1_lastBlock_post_copy\r
+ mov r1, r18\r
+sha1_lastBlock_copy_loop:\r
+ ld r0, X+\r
+ st Z+, r0\r
+ dec r1\r
+ brne sha1_lastBlock_copy_loop\r
+sha1_lastBlock_post_copy: \r
+sha1_lastBlock_insert_stuffing_bit: \r
+ ldi r19, 0x80\r
+ mov r0,r19 \r
+ ldi r19, 0x07\r
+ and r19, r20 /* if we are in bitmode */\r
+ breq 2f /* no bitmode */\r
+1: \r
+ lsr r0\r
+ dec r19\r
+ brne 1b\r
+ ld r19, X\r
+/* maybe we should do some ANDing here, just for safety */\r
+ or r0, r19\r
+2: \r
+ st Z+, r0\r
+ inc r18\r
+\r
+/* checking stuff here */\r
+ cpi r18, 64-8+1\r
+ brsh 0f \r
+ rjmp sha1_lastBlock_insert_zeros\r
+0:\r
+ /* oh shit, we landed here */\r
+ /* first we have to fill it up with zeros */\r
+ ldi r19, 64\r
+ sub r19, r18\r
+ breq 2f\r
+1: \r
+ st Z+, r1\r
+ dec r19\r
+ brne 1b \r
+2: \r
+ sbiw r30, 63\r
+ sbiw r30, 1\r
+ movw r22, r30\r
+ \r
+ push r31\r
+ push r30\r
+ push r25\r
+ push r24\r
+ push r21\r
+ push r20\r
+ rcall sha1_nextBlock\r
+ pop r20\r
+ pop r21\r
+ pop r24\r
+ pop r25\r
+ pop r30\r
+ pop r31\r
+ \r
+ /* now we should subtract 512 from length */\r
+ movw r26, r24\r
+ adiw r26, 4*5+1 /* we can skip the lowest byte */\r
+ ld r19, X\r
+ subi r19, hi8(512)\r
+ st X+, r19\r
+ ldi r18, 6\r
+1:\r
+ ld r19, X\r
+ sbci r19, 0\r
+ st X+, r19\r
+ dec r18\r
+ brne 1b\r
+ \r
+; clr r18 /* not neccessary ;-) */\r
+ /* reset Z pointer to begin of block */\r
+\r
+sha1_lastBlock_insert_zeros: \r
+ ldi r19, 64-8\r
+ sub r19, r18\r
+ breq sha1_lastBlock_insert_length\r
+ clr r1\r
+1:\r
+ st Z+, r1 /* r1 is still zero */\r
+ dec r19\r
+ brne 1b\r
+\r
+; rjmp sha1_lastBlock_epilog\r
+sha1_lastBlock_insert_length:\r
+ movw r26, r24 /* X points to state */\r
+ adiw r26, 5*4 /* X points to (state.length) */\r
+ adiw r30, 8 /* Z points one after the last byte of block */\r
+ ld r0, X+\r
+ add r0, r20\r
+ st -Z, r0\r
+ ld r0, X+\r
+ adc r0, r21\r
+ st -Z, r0\r
+ ldi r19, 6\r
+1:\r
+ ld r0, X+\r
+ adc r0, r1\r
+ st -Z, r0\r
+ dec r19\r
+ brne 1b\r
+\r
+ sbiw r30, 64-8\r
+ movw r22, r30\r
+ rcall sha1_nextBlock\r
+\r
+sha1_lastBlock_epilog:\r
+ in r30, SPL\r
+ in r31, SPH\r
+ in r1, SREG\r
+ adiw r30, 63 ; lo8(64)\r
+ adiw r30, 1 ; hi8(64)\r
+ cli\r
+ out SPL, r30\r
+ out SPH, r31\r
+ out SREG,r1\r
+ clr r1\r
+ clr r0\r
+ ret\r
+\r
+/**/\r
+;########################################################### \r
+\r
+.global sha1_nextBlock\r
+; === sha1_nextBlock ===\r
+; this is the core function for calculating SHA-1 hashes\r
+; param1: the 16-bit pointer to sha1_ctx structure\r
+; given in r25,r24 (r25 is most significant)\r
+; param2: an 16-bit pointer to 64 byte block to hash\r
+; given in r23,r22\r
+sha1_nextBlock_localSpace = (16+5+1)*4 ; 16 32-bit values for w array and 5 32-bit values for a array (total 84 byte)\r
+\r
+xtmp = 0\r
+xNULL = 1\r
+W1 = 10\r
+W2 = 11\r
+T1 = 12\r
+T2 = 13\r
+T3 = 14\r
+T4 = 15\r
+LoopC = 16\r
+S = 17\r
+tmp1 = 18\r
+tmp2 = 19\r
+tmp3 = 20\r
+tmp4 = 21\r
+F1 = 22\r
+F2 = 23\r
+F3 = 24\r
+F4 = 25\r
+\r
+/* byteorder: high number <--> high significance */\r
+sha1_nextBlock:\r
+ ; initial, let's make some space ready for local vars\r
+ /* replace push & pop by mem ops? */\r
+ push r10\r
+ push r11\r
+ push r12\r
+ push r13\r
+ push r14\r
+ push r15\r
+ push r16\r
+ push r17\r
+ push r28\r
+ push r29\r
+ in r20, SPL\r
+ in r21, SPH\r
+ movw r18, r20 ;backup SP\r
+; movw r26, r20 ; X points to free space on stack /* maybe removeable? */ \r
+ movw r30, r22 ; Z points to message\r
+ subi r20, lo8(sha1_nextBlock_localSpace) ;sbiw can do only up to 63\r
+ sbci r21, hi8(sha1_nextBlock_localSpace)\r
+ movw r26, r20 ; X points to free space on stack \r
+ in r0, SREG\r
+ cli ; we want to be uninterrupted while updating SP\r
+ out SPL, r20\r
+ out SPH, r21\r
+ out SREG, r0\r
+ \r
+ push r18\r
+ push r19 /* push old SP on new stack */\r
+ push r24\r
+ push r25 /* param1 will be needed later */\r
+ \r
+ /* load a[] with state */\r
+ movw 28, r24 /* load pointer to state in Y */\r
+ adiw r26, 1 ; X++\r
+\r
+ ldi LoopC, 5*4 \r
+1: ld tmp1, Y+\r
+ st X+, tmp1\r
+ dec LoopC\r
+ brne 1b\r
+\r
+ movw W1, r26 /* save pointer to w[0] */\r
+ /* load w[] with endian fixed message */\r
+ /* we might also use the changeendian32() function at bottom */\r
+ movw r30, r22 /* mv param2 (ponter to msg) to Z */ \r
+ ldi LoopC, 16\r
+1:\r
+ ldd tmp1, Z+3\r
+ st X+, tmp1\r
+ ldd tmp1, Z+2\r
+ st X+, tmp1\r
+ ldd tmp1, Z+1\r
+ st X+, tmp1\r
+ ld tmp1, Z\r
+ st X+, tmp1\r
+ adiw r30, 4\r
+ dec LoopC\r
+ brne 1b\r
+ \r
+ ;clr LoopC /* LoopC is named t in FIPS 180-2 */ \r
+ clr xtmp\r
+sha1_nextBlock_mainloop:\r
+ mov S, LoopC\r
+ lsl S\r
+ lsl S\r
+ andi S, 0x3C /* S is a bytepointer so *4 */\r
+ /* load w[s] */\r
+ movw r26, W1\r
+ add r26, S /* X points at w[s] */\r
+ adc r27, xNULL\r
+ ld T1, X+\r
+ ld T2, X+\r
+ ld T3, X+\r
+ ld T4, X+\r
+\r
+ /**/\r
+ push r26\r
+ push r27\r
+ push T4\r
+ push T3\r
+ push T2\r
+ push T1\r
+ in r26, SPL\r
+ in r27, SPH\r
+ adiw r26, 1\r
+ dbg_hexdump 4\r
+ pop T1\r
+ pop T2\r
+ pop T3\r
+ pop T4\r
+ pop r27\r
+ pop r26\r
+ /**/\r
+\r
+ cpi LoopC, 16\r
+ brlt sha1_nextBlock_mainloop_core\r
+ /* update w[s] */\r
+ ldi tmp1, 2*4\r
+ rcall 1f\r
+ ldi tmp1, 8*4\r
+ rcall 1f\r
+ ldi tmp1, 13*4\r
+ rcall 1f\r
+ rjmp 2f\r
+1: /* this might be "outsourced" to save the jump above */\r
+ add tmp1, S\r
+ andi tmp1, 0x3f\r
+ movw r26, W1\r
+ add r26, tmp1\r
+ adc r27, xNULL\r
+ ld tmp2, X+\r
+ eor T1, tmp2\r
+ ld tmp2, X+\r
+ eor T2, tmp2\r
+ ld tmp2, X+\r
+ eor T3, tmp2\r
+ ld tmp2, X+\r
+ eor T4, tmp2\r
+ ret\r
+2: /* now we just hav to do a ROTL(T) and save T back */\r
+ mov tmp2, T4\r
+ rol tmp2\r
+ rol T1\r
+ rol T2\r
+ rol T3\r
+ rol T4\r
+ movw r26, W1\r
+ add r26, S\r
+ adc r27, xNULL\r
+ st X+, T1\r
+ st X+, T2\r
+ st X+, T3\r
+ st X+, T4\r
+ \r
+sha1_nextBlock_mainloop_core: /* ther core function; T=ROTL5(a) ....*/ \r
+ /* T already contains w[s] */\r
+ movw r26, W1\r
+ sbiw r26, 4*1 /* X points at a[4] aka e */\r
+ ld tmp1, X+ \r
+ add T1, tmp1\r
+ ld tmp1, X+ \r
+ adc T2, tmp1\r
+ ld tmp1, X+ \r
+ adc T3, tmp1\r
+ ld tmp1, X+ \r
+ adc T4, tmp1 /* T = w[s]+e */\r
+ sbiw r26, 4*5 /* X points at a[0] aka a */\r
+ ld F1, X+ \r
+ ld F2, X+ \r
+ ld F3, X+ \r
+ ld F4, X+ \r
+ mov tmp1, F4 /* X points at a[1] aka b */\r
+ ldi tmp2, 5\r
+1:\r
+ rol tmp1\r
+ rol F1\r
+ rol F2\r
+ rol F3\r
+ rol F4\r
+ dec tmp2\r
+ brne 1b\r
+ \r
+ add T1, F1\r
+ adc T2, F2\r
+ adc T3, F3\r
+ adc T4, F4 /* T = ROTL(a,5) + e + w[s] */\r
+ \r
+ /* now we have to do this fucking conditional stuff */\r
+ ldi r30, lo8(sha1_nextBlock_xTable)\r
+ ldi r31, hi8(sha1_nextBlock_xTable)\r
+ add r30, xtmp\r
+ adc r31, xNULL\r
+ lpm tmp1, Z\r
+ cp tmp1, LoopC\r
+ brne 1f\r
+ inc xtmp\r
+1: ldi r30, lo8(sha1_nextBlock_KTable)\r
+ ldi r31, hi8(sha1_nextBlock_KTable)\r
+ lsl xtmp\r
+ lsl xtmp\r
+ add r30, xtmp\r
+ adc r31, xNULL\r
+ lsr xtmp\r
+ lsr xtmp\r
+ \r
+ lpm tmp1, Z+\r
+ add T1, tmp1\r
+ lpm tmp1, Z+\r
+ adc T2, tmp1\r
+ lpm tmp1, Z+\r
+ adc T3, tmp1\r
+ lpm tmp1, Z+\r
+ adc T4, tmp1\r
+ /* T = ROTL(a,5) + e + kt + w[s] */\r
+ \r
+ /* wo Z-4 gerade auf kt zeigt ... */\r
+ movw r28, r26 /* copy X in Y */\r
+ adiw r30, 3*4 /* now Z points to the rigth locatin in our jump-vector-table */\r
+ clc\r
+ ror r31\r
+ ror r30\r
+ \r
+ icall\r
+ mov F1, tmp1\r
+ icall\r
+ mov F2, tmp1\r
+ icall\r
+ mov F3, tmp1\r
+ icall\r
+ \r
+ add T1, F1\r
+ adc T2, F2\r
+ adc T3, F3\r
+ adc T4, tmp1 /* T = ROTL5(a) + f_t(b,c,d) + e + k_t + w[s] */\r
+ /* X points still at a[1] aka b, Y points at a[2] aka c */ \r
+ /* update a[] */\r
+sha1_nextBlock_update_a:\r
+ /*first we move all vars in a[] "one up" e=d, d=c, c=b, b=a*/\r
+ //adiw r28, 3*4 /* Y should point at a[4] aka e */\r
+ movw r28, W1\r
+ sbiw r28, 4\r
+ \r
+ ldi tmp2, 4*4 \r
+1: \r
+ ld tmp1, -Y\r
+ std Y+4, tmp1\r
+ dec tmp2\r
+ brne 1b\r
+ /* Y points at a[0] aka a*/\r
+ \r
+ movw r28, W1\r
+ sbiw r28, 5*4\r
+ /* store T in a[0] aka a */\r
+ st Y+, T1\r
+ st Y+, T2\r
+ st Y+, T3\r
+ st Y+, T4\r
+ /* Y points at a[1] aka b*/\r
+ \r
+ /* rotate c */\r
+ ldd T1, Y+1*4\r
+ ldd T2, Y+1*4+1\r
+ ldd T3, Y+1*4+2\r
+ ldd T4, Y+1*4+3\r
+ mov tmp1, T1\r
+ ldi tmp2, 2\r
+1: ror tmp1\r
+ ror T4\r
+ ror T3\r
+ ror T2\r
+ ror T1\r
+ dec tmp2\r
+ brne 1b\r
+ std Y+1*4+0, T1\r
+ std Y+1*4+1, T2\r
+ std Y+1*4+2, T3\r
+ std Y+1*4+3, T4\r
+ \r
+ push r27\r
+ push r26\r
+ movw r26, W1\r
+ sbiw r26, 4*5\r
+ dbg_hexdump 4*5\r
+ pop r26\r
+ pop r27\r
+ \r
+ inc LoopC\r
+ cpi LoopC, 80\r
+ brge 1f\r
+ jmp sha1_nextBlock_mainloop\r
+/**************************************/\r
+1: \r
+ /* littel patch */\r
+ sbiw r28, 4\r
+\r
+/* add a[] to state and inc length */ \r
+ pop r27\r
+ pop r26 /* now X points to state (and Y still at a[0]) */\r
+ ldi tmp4, 5\r
+1: clc\r
+ ldi tmp3, 4\r
+2: ld tmp1, X\r
+ ld tmp2, Y+\r
+ adc tmp1, tmp2\r
+ st X+, tmp1\r
+ dec tmp3\r
+ brne 2b\r
+ dec tmp4\r
+ brne 1b\r
+ \r
+ /* now length += 512 */\r
+ adiw r26, 1 /* we skip the least significant byte */\r
+ ld tmp1, X\r
+ ldi tmp2, hi8(512) /* 2 */\r
+ add tmp1, tmp2\r
+ st X+, tmp1\r
+ ldi tmp2, 6\r
+1:\r
+ ld tmp1, X\r
+ adc tmp1, xNULL\r
+ st X+, tmp1\r
+ dec tmp2\r
+ brne 1b\r
+ \r
+; EPILOG\r
+sha1_nextBlock_epilog:\r
+/* now we should clean up the stack */\r
+ pop r21\r
+ pop r20\r
+ in r0, SREG\r
+ cli ; we want to be uninterrupted while updating SP\r
+ out SPL, r20\r
+ out SPH, r21\r
+ out SREG, r0\r
+ \r
+ clr r1\r
+ pop r29\r
+ pop r28\r
+ pop r17\r
+ pop r16\r
+ pop r15\r
+ pop r14\r
+ pop r13\r
+ pop r12\r
+ pop r11\r
+ pop r10\r
+ ret\r
+\r
+sha1_nextBlock_xTable:\r
+.byte 20,40,60,0\r
+sha1_nextBlock_KTable:\r
+.int 0x5a827999 \r
+.int 0x6ed9eba1 \r
+.int 0x8f1bbcdc \r
+.int 0xca62c1d6\r
+sha1_nextBlock_JumpTable:\r
+jmp sha1_nextBlock_Ch \r
+jmp sha1_nextBlock_Parity\r
+jmp sha1_nextBlock_Maj\r
+jmp sha1_nextBlock_Parity\r
+\r
+ /* X and Y still point at a[1] aka b ; return value in tmp1 */\r
+sha1_nextBlock_Ch:\r
+ ld tmp1, Y+\r
+ mov tmp2, tmp1\r
+ com tmp2\r
+ ldd tmp3, Y+3 /* load from c */\r
+ and tmp1, tmp3\r
+ ldd tmp3, Y+7 /* load from d */\r
+ and tmp2, tmp3\r
+ eor tmp1, tmp2\r
+ /**\r
+ precall\r
+ ldi r24, lo8(ch_str)\r
+ ldi r25, hi8(ch_str)\r
+ call uart_putstr_P\r
+ postcall\r
+ /**/\r
+ ret\r
+ \r
+sha1_nextBlock_Maj:\r
+ ld tmp1, Y+\r
+ mov tmp2, tmp1\r
+ ldd tmp3, Y+3 /* load from c */\r
+ and tmp1, tmp3\r
+ ldd tmp4, Y+7 /* load from d */\r
+ and tmp2, tmp4\r
+ eor tmp1, tmp2\r
+ and tmp3, tmp4\r
+ eor tmp1, tmp3\r
+ /**\r
+ precall\r
+ ldi r24, lo8(maj_str)\r
+ ldi r25, hi8(maj_str)\r
+ call uart_putstr_P\r
+ postcall\r
+ /**/\r
+ ret\r
+\r
+sha1_nextBlock_Parity:\r
+ ld tmp1, Y+\r
+ ldd tmp2, Y+3 /* load from c */\r
+ eor tmp1, tmp2\r
+ ldd tmp2, Y+7 /* load from d */\r
+ eor tmp1, tmp2\r
+ \r
+ /**\r
+ precall\r
+ ldi r24, lo8(parity_str)\r
+ ldi r25, hi8(parity_str)\r
+ call uart_putstr_P\r
+ postcall\r
+ /**/\r
+ ret\r
+/* \r
+ch_str: .asciz "\r\nCh"\r
+maj_str: .asciz "\r\nMaj"\r
+parity_str: .asciz "\r\nParity"\r
+*/\r
+;########################################################### \r
+\r
+.global sha1_init \r
+;void sha1_init(sha1_ctx_t *state){\r
+; DEBUG_S("\r\nSHA1_INIT");\r
+; state->h[0] = 0x67452301;\r
+; state->h[1] = 0xefcdab89;\r
+; state->h[2] = 0x98badcfe;\r
+; state->h[3] = 0x10325476;\r
+; state->h[4] = 0xc3d2e1f0;\r
+; state->length = 0;\r
+;}\r
+; param1: (Func3,r24) 16-bit pointer to sha1_ctx_t struct in ram\r
+; modifys: Z(r30,r31), Func1, r22\r
+sha1_init:\r
+ movw r26, r24 ; (24,25) --> (26,27) load X with param1\r
+ ldi r30, lo8((sha1_init_vector))\r
+ ldi r31, hi8((sha1_init_vector))\r
+ ldi r22, 5*4 /* bytes to copy */\r
+sha1_init_vloop: \r
+ lpm r23, Z+ \r
+ st X+, r23\r
+ dec r22\r
+ brne sha1_init_vloop\r
+ ldi r22, 8\r
+ clr r1 /* this should not be needed */\r
+sha1_init_lloop:\r
+ st X+, r1\r
+ dec r22\r
+ brne sha1_init_lloop\r
+ ret\r
+ \r
+sha1_init_vector:\r
+.int 0x67452301;\r
+.int 0xefcdab89;\r
+.int 0x98badcfe;\r
+.int 0x10325476;\r
+.int 0xc3d2e1f0;\r
+/*\r
+;########################################################### \r
+\r
+.global rotl32\r
+; === ROTL32 ===\r
+; function that rotates a 32 bit word to the left\r
+; param1: the 32-bit word to rotate\r
+; given in r25,r24,r23,r22 (r25 is most significant)\r
+; param2: an 8-bit value telling how often to rotate\r
+; given in r20\r
+; modifys: r21, r22\r
+rotl32:\r
+ cpi r20, 8\r
+ brlo bitrotl\r
+ mov r21, r25\r
+ mov r25, r24\r
+ mov r24, r23\r
+ mov r23, r22\r
+ mov r22, r21\r
+ subi r20, 8\r
+ rjmp rotr32\r
+bitrotl:\r
+ clr r21\r
+ clc\r
+bitrotl_loop: \r
+ tst r20\r
+ breq fixrotl\r
+ rol r22\r
+ rol r23\r
+ rol r24\r
+ rol r25\r
+ rol r21\r
+ dec r20\r
+ rjmp bitrotl_loop\r
+fixrotl:\r
+ or r22, r21\r
+ ret\r
+ \r
+\r
+;########################################################### \r
+\r
+.global rotr32\r
+; === ROTR32 ===\r
+; function that rotates a 32 bit word to the right\r
+; param1: the 32-bit word to rotate\r
+; given in r25,r24,r23,22 (r25 is most significant)\r
+; param2: an 8-bit value telling how often to rotate\r
+; given in r20\r
+; modifys: r21, r22\r
+rotr32:\r
+ cpi r20, 8\r
+ brlo bitrotr\r
+ mov r21, r22\r
+ mov r22, r23\r
+ mov r23, r24\r
+ mov r24, r25\r
+ mov r25, r21\r
+ subi r20, 8\r
+ rjmp rotr32\r
+bitrotr:\r
+ clr r21\r
+ clc\r
+bitrotr_loop: \r
+ tst r20\r
+ breq fixrotr\r
+ ror r25\r
+ ror r24\r
+ ror r23\r
+ ror r22\r
+ ror r21\r
+ dec r20\r
+ rjmp bitrotr_loop\r
+fixrotr:\r
+ or r25, r21\r
+ ret\r
+ \r
+ \r
+;########################################################### \r
+ \r
+.global change_endian32\r
+; === change_endian32 ===\r
+; function that changes the endianess of a 32-bit word\r
+; param1: the 32-bit word\r
+; given in r25,r24,r23,22 (r25 is most significant)\r
+; modifys: r21, r22\r
+change_endian32:\r
+ movw r20, r22 ; (r22,r23) --> (r20,r21)\r
+ mov r22, r25\r
+ mov r23, r24\r
+ mov r24, r21\r
+ mov r25, r20 \r
+ ret\r
+*/\r
--- /dev/null
+/**
+ * \file sha1.c
+ * \author Daniel Otte
+ * \date 08.10.2006
+ * \par License:
+ * GPL
+ * \brief SHA-1 implementation.
+ *
+ */
+
+#include <string.h> /* memcpy & co */
+#include <stdint.h>
+#include "config.h"
+#undef DEBUG
+#include "debug.h"
+#include "sha1.h"
+
+#define LITTLE_ENDIAN
+
+/********************************************************************************************************/
+
+/**
+ * \brief initialises given SHA-1 context
+ *
+ */
+void sha1_init(sha1_ctx_t *state){
+ DEBUG_S("\r\nSHA1_INIT");
+ state->h[0] = 0x67452301;
+ state->h[1] = 0xefcdab89;
+ state->h[2] = 0x98badcfe;
+ state->h[3] = 0x10325476;
+ state->h[4] = 0xc3d2e1f0;
+ state->length = 0;
+}
+
+/********************************************************************************************************/
+/* some helping functions */
+uint32_t rotl32(uint32_t n, uint8_t bits){
+ return ((n<<bits) | (n>>(32-bits)));
+}
+
+uint32_t change_endian32(uint32_t x){
+ return (((x)<<24) | ((x)>>24) | (((x)& 0x0000ff00)<<8) | (((x)& 0x00ff0000)>>8));
+}
+
+
+/* three SHA-1 inner functions */
+uint32_t ch(uint32_t x, uint32_t y, uint32_t z){
+ DEBUG_S("\r\nCH");
+ return ((x&y)^((~x)&z));
+}
+
+uint32_t maj(uint32_t x, uint32_t y, uint32_t z){
+ DEBUG_S("\r\nMAJ");
+ return ((x&y)^(x&z)^(y&z));
+}
+
+uint32_t parity(uint32_t x, uint32_t y, uint32_t z){
+ DEBUG_S("\r\nPARITY");
+ return ((x^y)^z);
+}
+
+/********************************************************************************************************/
+/**
+ * \brief "add" a block to the hash
+ * This is the core function of the hash algorithm. To understand how it's working
+ * and what thoese variables do, take a look at FIPS-182. This is an "alternativ" implementation
+ */
+
+#define MASK 0x0000000f
+
+typedef uint32_t (*pf_t)(uint32_t x, uint32_t y, uint32_t z);
+
+void sha1_nextBlock (sha1_ctx_t *state, void* block){
+ uint32_t a[5];
+ uint32_t w[16];
+ uint32_t temp;
+ uint8_t t,s;
+ pf_t f[] = {ch,parity,maj,parity};
+ uint32_t k[4]={ 0x5a827999,
+ 0x6ed9eba1,
+ 0x8f1bbcdc,
+ 0xca62c1d6};
+
+ /* load the w array (changing the endian and so) */
+ for(t=0; t<16; ++t){
+ w[t] = change_endian32(((uint32_t*)block)[t]);
+ }
+
+ uint8_t dbgi;
+ for(dbgi=0; dbgi<16; ++dbgi){
+ DEBUG_S("\n\rBlock:");
+ DEBUG_B(dbgi);
+ DEBUG_C(':');
+ #ifdef DEBUG
+ uart_hexdump(&(w[dbgi]) ,4);
+ #endif
+ }
+
+
+ /* load the state */
+ memcpy(a, state->h, 5*sizeof(uint32_t));
+
+
+ /* the fun stuff */
+ for(t=0; t<=79; ++t){
+ s = t & MASK;
+ if(t>=16){
+ #ifdef DEBUG
+ DEBUG_S("\r\n ws = "); uart_hexdump(&ws, 4);
+ #endif
+ w[s] = rotl32( w[(s+13)&MASK] ^ w[(s+8)&MASK] ^
+ w[(s+ 2)&MASK] ^ w[s] ,1);
+ #ifdef DEBUG
+ DEBUG_S(" --> ws = "); uart_hexdump(&(w[s]), 4);
+ #endif
+ }
+
+ uint32_t dtemp;
+ temp = rotl32(a[0],5) + (dtemp=f[t/20](a[1],a[2],a[3])) + a[4] + k[t/20] + w[s];
+ memmove(&(a[1]), &(a[0]), 4*sizeof(uint32_t)); /* e=d; d=c; c=b; b=a; */
+ a[0] = temp;
+ a[2] = rotl32(a[2],30); /* we might also do rotr32(c,2) */
+
+ /* debug dump */
+ DEBUG_S("\r\nt = "); DEBUG_B(t);
+ DEBUG_S("; a[]: ");
+ #ifdef DEBUG
+ uart_hexdump(a, 5*4);
+ #endif
+ DEBUG_S("; k = ");
+ #ifdef DEBUG
+ uart_hexdump(&(k[t/20]), 4);
+ #endif
+ DEBUG_S("; f(b,c,d) = ");
+ #ifdef DEBUG
+ uart_hexdump(&dtemp, 4);
+ #endif
+ }
+
+ /* update the state */
+ for(t=0; t<5; ++t){
+ state->h[t] += a[t];
+ }
+ state->length += 512;
+}
+
+/********************************************************************************************************/
+
+void sha1_lastBlock(sha1_ctx_t *state, void* block, uint16_t length){
+ uint8_t lb[SHA1_BLOCK_BITS/8]; /* local block */
+ state->length += length;
+ memcpy (&(lb[0]), block, length/8);
+
+ /* set the final one bit */
+ if (length & 0x3){ /* if we have single bits at the end */
+ lb[length/8] = ((uint8_t*)(block))[length/8];
+ } else {
+ lb[length/8] = 0;
+ }
+ lb[length/8] |= 0x80>>(length & 0x3);
+ length =(length >> 3) + 1; /* from now on length contains the number of BYTES in lb*/
+ /* pad with zeros */
+ if (length>64-8){ /* not enouth space for 64bit length value */
+ memset((void*)(&(lb[length])), 0, 64-length);
+ sha1_nextBlock(state, lb);
+ state->length -= 512;
+ length = 0;
+ }
+ memset((void*)(&(lb[length])), 0, 56-length);
+ /* store the 64bit length value */
+#if defined LITTLE_ENDIAN
+ /* this is now rolled up */
+ uint8_t i;
+ for (i=1; i<=8; ++i){
+ lb[55+i] = (uint8_t)(state->length>>(64- 8*i));
+ }
+#elif defined BIG_ENDIAN
+ *((uint64_t)&(lb[56])) = state->length;
+#endif
+ sha1_nextBlock(state, lb);
+}
+
+/********************************************************************************************************/
+
+void sha1_ctx2hash (sha1_hash_t *dest, sha1_ctx_t *state){
+#if defined LITTLE_ENDIAN
+ uint8_t i;
+ for(i=0; i<8; ++i){
+ ((uint32_t*)dest)[i] = change_endian32(state->h[i]);
+ }
+#elif BIG_ENDIAN
+ if (dest != state->h)
+ memcpy(dest, state->h, SHA256_HASH_BITS/8);
+#else
+# error unsupported endian type!
+#endif
+}
+
+/********************************************************************************************************/
+/**
+ *
+ *
+ */
+void sha1 (sha1_hash_t *dest, void* msg, uint32_t length){
+ sha1_ctx_t s;
+ DEBUG_S("\r\nBLA BLUB");
+ sha1_init(&s);
+ while(length & (~0x0001ff)){ /* length>=512 */
+ DEBUG_S("\r\none block");
+ sha1_nextBlock(&s, msg);
+ msg += SHA1_BLOCK_BITS/8; /* increment pointer to next block */
+ length -= SHA1_BLOCK_BITS;
+ }
+ sha1_lastBlock(&s, msg, length);
+ sha1_ctx2hash(dest, &s);
+}
+
+