2 * Author: Daniel Otte
\r
6 ; SHA1 implementation in assembler for AVR
\r
7 SHA1_BLOCK_BITS = 512
\r
11 /* push r18 - r27, r30 - r31*/
\r
47 .macro hexdump length
\r
70 ldi r22, lo8(\length)
\r
71 ldi r23, hi8(\length)
\r
90 /* X points to Block */
\r
91 .macro dbg_hexdump length
\r
111 ; [h0][h1][h2][h3][h4][length]
\r
112 ; hn is 32 bit large, length is 64 bit large
\r
114 ;###########################################################
\r
116 .global sha1_ctx2hash
\r
117 ; === sha1_ctx2hash ===
\r
118 ; this function converts a state into a normal hash (bytestring)
\r
119 ; param1: the 16-bit destination pointer
\r
120 ; given in r25,r24 (r25 is most significant)
\r
121 ; param2: the 16-bit pointer to sha1_ctx structure
\r
142 ;###########################################################
\r
146 ; this function calculates SHA-1 hashes from messages in RAM
\r
147 ; param1: the 16-bit hash destination pointer
\r
148 ; given in r25,r24 (r25 is most significant)
\r
149 ; param2: the 16-bit pointer to message
\r
151 ; param3: 32-bit length value (length of message in bits)
\r
152 ; given in r21,r20,r19,r18
\r
178 movw r8, r18 /* backup of length*/
\r
181 movw r12, r22 /* backup pf msg-ptr */
\r
185 /* if length >= 512 */
\r
197 rcall sha1_nextBlock
\r
201 /* length -= 512 */
\r
212 rcall sha1_lastBlock
\r
217 rcall sha1_ctx2hash
\r
238 ;###########################################################
\r
241 ; block MUST NOT be larger than 64 bytes
\r
243 .global sha1_lastBlock
\r
244 ; === sha1_lastBlock ===
\r
245 ; this function does padding & Co. for calculating SHA-1 hashes
\r
246 ; param1: the 16-bit pointer to sha1_ctx structure
\r
247 ; given in r25,r24 (r25 is most significant)
\r
248 ; param2: an 16-bit pointer to 64 byte block to hash
\r
250 ; param3: an 16-bit integer specifing length of block in bits
\r
252 sha1_lastBlock_localSpace = (SHA1_BLOCK_BITS/8+1)
\r
257 brne sha1_lastBlock_prolog
\r
259 brne sha1_lastBlock_prolog
\r
264 rcall sha1_nextBlock
\r
271 sha1_lastBlock_prolog:
\r
272 /* allocate space on stack */
\r
277 sbci r31, hi8(64) /* ??? */
\r
283 adiw r30, 1 /* SP points to next free byte on stack */
\r
284 mov r18, r20 /* r20 = LSB(length) */
\r
288 bst r21, 0 /* may be we should explain this ... */
\r
289 bld r18, 5 /* now: r18 == length/8 (aka. length in bytes) */
\r
292 movw r26, r22 /* X points to begin of msg */
\r
294 breq sha1_lastBlock_post_copy
\r
296 sha1_lastBlock_copy_loop:
\r
300 brne sha1_lastBlock_copy_loop
\r
301 sha1_lastBlock_post_copy:
\r
302 sha1_lastBlock_insert_stuffing_bit:
\r
306 and r19, r20 /* if we are in bitmode */
\r
307 breq 2f /* no bitmode */
\r
313 /* maybe we should do some ANDing here, just for safety */
\r
319 /* checking stuff here */
\r
322 rjmp sha1_lastBlock_insert_zeros
\r
324 /* oh shit, we landed here */
\r
325 /* first we have to fill it up with zeros */
\r
344 rcall sha1_nextBlock
\r
352 /* now we should subtract 512 from length */
\r
354 adiw r26, 4*5+1 /* we can skip the lowest byte */
\r
366 ; clr r18 /* not neccessary ;-) */
\r
367 /* reset Z pointer to begin of block */
\r
369 sha1_lastBlock_insert_zeros:
\r
372 breq sha1_lastBlock_insert_length
\r
375 st Z+, r1 /* r1 is still zero */
\r
379 ; rjmp sha1_lastBlock_epilog
\r
380 sha1_lastBlock_insert_length:
\r
381 movw r26, r24 /* X points to state */
\r
382 adiw r26, 5*4 /* X points to (state.length) */
\r
383 adiw r30, 8 /* Z points one after the last byte of block */
\r
400 rcall sha1_nextBlock
\r
402 sha1_lastBlock_epilog:
\r
406 adiw r30, 63 ; lo8(64)
\r
407 adiw r30, 1 ; hi8(64)
\r
417 ;###########################################################
\r
419 .global sha1_nextBlock
\r
420 ; === sha1_nextBlock ===
\r
421 ; this is the core function for calculating SHA-1 hashes
\r
422 ; param1: the 16-bit pointer to sha1_ctx structure
\r
423 ; given in r25,r24 (r25 is most significant)
\r
424 ; param2: an 16-bit pointer to 64 byte block to hash
\r
426 sha1_nextBlock_localSpace = (16+5+1)*4 ; 16 32-bit values for w array and 5 32-bit values for a array (total 84 byte)
\r
447 /* byteorder: high number <--> high significance */
\r
449 ; initial, let's make some space ready for local vars
\r
450 /* replace push & pop by mem ops? */
\r
463 movw r18, r20 ;backup SP
\r
464 ; movw r26, r20 ; X points to free space on stack /* maybe removeable? */
\r
465 movw r30, r22 ; Z points to message
\r
466 subi r20, lo8(sha1_nextBlock_localSpace) ;sbiw can do only up to 63
\r
467 sbci r21, hi8(sha1_nextBlock_localSpace)
\r
468 movw r26, r20 ; X points to free space on stack
\r
470 cli ; we want to be uninterrupted while updating SP
\r
476 push r19 /* push old SP on new stack */
\r
478 push r25 /* param1 will be needed later */
\r
480 /* load a[] with state */
\r
481 movw 28, r24 /* load pointer to state in Y */
\r
490 movw W1, r26 /* save pointer to w[0] */
\r
491 /* load w[] with endian fixed message */
\r
492 /* we might also use the changeendian32() function at bottom */
\r
493 movw r30, r22 /* mv param2 (ponter to msg) to Z */
\r
508 ;clr LoopC /* LoopC is named t in FIPS 180-2 */
\r
510 sha1_nextBlock_mainloop:
\r
514 andi S, 0x3C /* S is a bytepointer so *4 */
\r
517 add r26, S /* X points at w[s] */
\r
544 brlt sha1_nextBlock_mainloop_core
\r
553 1: /* this might be "outsourced" to save the jump above */
\r
568 2: /* now we just hav to do a ROTL(T) and save T back */
\r
583 sha1_nextBlock_mainloop_core: /* ther core function; T=ROTL5(a) ....*/
\r
584 /* T already contains w[s] */
\r
586 sbiw r26, 4*1 /* X points at a[4] aka e */
\r
594 adc T4, tmp1 /* T = w[s]+e */
\r
595 sbiw r26, 4*5 /* X points at a[0] aka a */
\r
600 mov tmp1, F4 /* X points at a[1] aka b */
\r
614 adc T4, F4 /* T = ROTL(a,5) + e + w[s] */
\r
616 /* now we have to do this fucking conditional stuff */
\r
617 ldi r30, lo8(sha1_nextBlock_xTable)
\r
618 ldi r31, hi8(sha1_nextBlock_xTable)
\r
625 1: ldi r30, lo8(sha1_nextBlock_KTable)
\r
626 ldi r31, hi8(sha1_nextBlock_KTable)
\r
642 /* T = ROTL(a,5) + e + kt + w[s] */
\r
644 /* wo Z-4 gerade auf kt zeigt ... */
\r
645 movw r28, r26 /* copy X in Y */
\r
646 adiw r30, 3*4 /* now Z points to the rigth locatin in our jump-vector-table */
\r
662 adc T4, tmp1 /* T = ROTL5(a) + f_t(b,c,d) + e + k_t + w[s] */
\r
663 /* X points still at a[1] aka b, Y points at a[2] aka c */
\r
665 sha1_nextBlock_update_a:
\r
666 /*first we move all vars in a[] "one up" e=d, d=c, c=b, b=a*/
\r
667 //adiw r28, 3*4 /* Y should point at a[4] aka e */
\r
677 /* Y points at a[0] aka a*/
\r
681 /* store T in a[0] aka a */
\r
686 /* Y points at a[1] aka b*/
\r
718 jmp sha1_nextBlock_mainloop
\r
719 /**************************************/
\r
724 /* add a[] to state and inc length */
\r
726 pop r26 /* now X points to state (and Y still at a[0]) */
\r
739 /* now length += 512 */
\r
740 adiw r26, 1 /* we skip the least significant byte */
\r
742 ldi tmp2, hi8(512) /* 2 */
\r
754 sha1_nextBlock_epilog:
\r
755 /* now we should clean up the stack */
\r
759 cli ; we want to be uninterrupted while updating SP
\r
777 sha1_nextBlock_xTable:
\r
779 sha1_nextBlock_KTable:
\r
784 sha1_nextBlock_JumpTable:
\r
785 jmp sha1_nextBlock_Ch
\r
786 jmp sha1_nextBlock_Parity
\r
787 jmp sha1_nextBlock_Maj
\r
788 jmp sha1_nextBlock_Parity
\r
790 /* X and Y still point at a[1] aka b ; return value in tmp1 */
\r
795 ldd tmp3, Y+3 /* load from c */
\r
797 ldd tmp3, Y+7 /* load from d */
\r
802 ldi r24, lo8(ch_str)
\r
803 ldi r25, hi8(ch_str)
\r
809 sha1_nextBlock_Maj:
\r
812 ldd tmp3, Y+3 /* load from c */
\r
814 ldd tmp4, Y+7 /* load from d */
\r
821 ldi r24, lo8(maj_str)
\r
822 ldi r25, hi8(maj_str)
\r
828 sha1_nextBlock_Parity:
\r
830 ldd tmp2, Y+3 /* load from c */
\r
832 ldd tmp2, Y+7 /* load from d */
\r
837 ldi r24, lo8(parity_str)
\r
838 ldi r25, hi8(parity_str)
\r
844 ch_str: .asciz "\r\nCh"
\r
845 maj_str: .asciz "\r\nMaj"
\r
846 parity_str: .asciz "\r\nParity"
\r
848 ;###########################################################
\r
851 ;void sha1_init(sha1_ctx_t *state){
\r
852 ; DEBUG_S("\r\nSHA1_INIT");
\r
853 ; state->h[0] = 0x67452301;
\r
854 ; state->h[1] = 0xefcdab89;
\r
855 ; state->h[2] = 0x98badcfe;
\r
856 ; state->h[3] = 0x10325476;
\r
857 ; state->h[4] = 0xc3d2e1f0;
\r
858 ; state->length = 0;
\r
860 ; param1: (Func3,r24) 16-bit pointer to sha1_ctx_t struct in ram
\r
861 ; modifys: Z(r30,r31), Func1, r22
\r
863 movw r26, r24 ; (24,25) --> (26,27) load X with param1
\r
864 ldi r30, lo8((sha1_init_vector))
\r
865 ldi r31, hi8((sha1_init_vector))
\r
866 ldi r22, 5*4 /* bytes to copy */
\r
871 brne sha1_init_vloop
\r
873 clr r1 /* this should not be needed */
\r
877 brne sha1_init_lloop
\r
887 ;###########################################################
\r
891 ; function that rotates a 32 bit word to the left
\r
892 ; param1: the 32-bit word to rotate
\r
893 ; given in r25,r24,r23,r22 (r25 is most significant)
\r
894 ; param2: an 8-bit value telling how often to rotate
\r
896 ; modifys: r21, r22
\r
925 ;###########################################################
\r
929 ; function that rotates a 32 bit word to the right
\r
930 ; param1: the 32-bit word to rotate
\r
931 ; given in r25,r24,r23,22 (r25 is most significant)
\r
932 ; param2: an 8-bit value telling how often to rotate
\r
934 ; modifys: r21, r22
\r
963 ;###########################################################
\r
965 .global change_endian32
\r
966 ; === change_endian32 ===
\r
967 ; function that changes the endianess of a 32-bit word
\r
968 ; param1: the 32-bit word
\r
969 ; given in r25,r24,r23,22 (r25 is most significant)
\r
970 ; modifys: r21, r22
\r
972 movw r20, r22 ; (r22,r23) --> (r20,r21)
\r