3 This file is part of the Crypto-avr-lib/microcrypt-lib.
4 Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de)
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>.
20 * Author: Daniel Otte
\r
24 ; SHA1 implementation in assembler for AVR
\r
25 SHA1_BLOCK_BITS = 512
\r
26 SHA1_HASH_BITS = 160
\r
29 /* push r18 - r27, r30 - r31*/
\r
65 .macro hexdump length
\r
88 ldi r22, lo8(\length)
\r
89 ldi r23, hi8(\length)
\r
108 /* X points to Block */
\r
109 .macro dbg_hexdump length
\r
129 ; [h0][h1][h2][h3][h4][length]
\r
130 ; hn is 32 bit large, length is 64 bit large
\r
132 ;###########################################################
\r
134 .global sha1_ctx2hash
\r
135 ; === sha1_ctx2hash ===
\r
136 ; this function converts a state into a normal hash (bytestring)
\r
137 ; param1: the 16-bit destination pointer
\r
138 ; given in r25,r24 (r25 is most significant)
\r
139 ; param2: the 16-bit pointer to sha1_ctx structure
\r
160 ;###########################################################
\r
164 ; this function calculates SHA-1 hashes from messages in RAM
\r
165 ; param1: the 16-bit hash destination pointer
\r
166 ; given in r25,r24 (r25 is most significant)
\r
167 ; param2: the 16-bit pointer to message
\r
169 ; param3: 32-bit length value (length of message in bits)
\r
170 ; given in r21,r20,r19,r18
\r
196 movw r8, r18 /* backup of length*/
\r
199 movw r12, r22 /* backup pf msg-ptr */
\r
203 /* if length >= 512 */
\r
215 rcall sha1_nextBlock
\r
219 /* length -= 512 */
\r
230 rcall sha1_lastBlock
\r
235 rcall sha1_ctx2hash
\r
256 ;###########################################################
\r
259 ; block MUST NOT be larger than 64 bytes
\r
261 .global sha1_lastBlock
\r
262 ; === sha1_lastBlock ===
\r
263 ; this function does padding & Co. for calculating SHA-1 hashes
\r
264 ; param1: the 16-bit pointer to sha1_ctx structure
\r
265 ; given in r25,r24 (r25 is most significant)
\r
266 ; param2: an 16-bit pointer to 64 byte block to hash
\r
268 ; param3: an 16-bit integer specifing length of block in bits
\r
270 sha1_lastBlock_localSpace = (SHA1_BLOCK_BITS/8+1)
\r
275 brne sha1_lastBlock_prolog
\r
277 brne sha1_lastBlock_prolog
\r
282 rcall sha1_nextBlock
\r
289 sha1_lastBlock_prolog:
\r
290 /* allocate space on stack */
\r
295 sbci r31, hi8(64) /* ??? */
\r
301 adiw r30, 1 /* SP points to next free byte on stack */
\r
302 mov r18, r20 /* r20 = LSB(length) */
\r
306 bst r21, 0 /* may be we should explain this ... */
\r
307 bld r18, 5 /* now: r18 == length/8 (aka. length in bytes) */
\r
310 movw r26, r22 /* X points to begin of msg */
\r
312 breq sha1_lastBlock_post_copy
\r
314 sha1_lastBlock_copy_loop:
\r
318 brne sha1_lastBlock_copy_loop
\r
319 sha1_lastBlock_post_copy:
\r
320 sha1_lastBlock_insert_stuffing_bit:
\r
324 and r19, r20 /* if we are in bitmode */
\r
325 breq 2f /* no bitmode */
\r
331 /* maybe we should do some ANDing here, just for safety */
\r
337 /* checking stuff here */
\r
340 rjmp sha1_lastBlock_insert_zeros
\r
342 /* oh shit, we landed here */
\r
343 /* first we have to fill it up with zeros */
\r
362 rcall sha1_nextBlock
\r
370 /* now we should subtract 512 from length */
\r
372 adiw r26, 4*5+1 /* we can skip the lowest byte */
\r
384 ; clr r18 /* not neccessary ;-) */
\r
385 /* reset Z pointer to begin of block */
\r
387 sha1_lastBlock_insert_zeros:
\r
390 breq sha1_lastBlock_insert_length
\r
393 st Z+, r1 /* r1 is still zero */
\r
397 ; rjmp sha1_lastBlock_epilog
\r
398 sha1_lastBlock_insert_length:
\r
399 movw r26, r24 /* X points to state */
\r
400 adiw r26, 5*4 /* X points to (state.length) */
\r
401 adiw r30, 8 /* Z points one after the last byte of block */
\r
418 rcall sha1_nextBlock
\r
420 sha1_lastBlock_epilog:
\r
424 adiw r30, 63 ; lo8(64)
\r
425 adiw r30, 1 ; hi8(64)
\r
435 ;###########################################################
\r
437 .global sha1_nextBlock
\r
438 ; === sha1_nextBlock ===
\r
439 ; this is the core function for calculating SHA-1 hashes
\r
440 ; param1: the 16-bit pointer to sha1_ctx structure
\r
441 ; given in r25,r24 (r25 is most significant)
\r
442 ; param2: an 16-bit pointer to 64 byte block to hash
\r
444 sha1_nextBlock_localSpace = (16+5+1)*4 ; 16 32-bit values for w array and 5 32-bit values for a array (total 84 byte)
\r
465 /* byteorder: high number <--> high significance */
\r
467 ; initial, let's make some space ready for local vars
\r
468 /* replace push & pop by mem ops? */
\r
481 movw r18, r20 ;backup SP
\r
482 ; movw r26, r20 ; X points to free space on stack /* maybe removeable? */
\r
483 movw r30, r22 ; Z points to message
\r
484 subi r20, lo8(sha1_nextBlock_localSpace) ;sbiw can do only up to 63
\r
485 sbci r21, hi8(sha1_nextBlock_localSpace)
\r
486 movw r26, r20 ; X points to free space on stack
\r
488 cli ; we want to be uninterrupted while updating SP
\r
494 push r19 /* push old SP on new stack */
\r
496 push r25 /* param1 will be needed later */
\r
498 /* load a[] with state */
\r
499 movw 28, r24 /* load pointer to state in Y */
\r
508 movw W1, r26 /* save pointer to w[0] */
\r
509 /* load w[] with endian fixed message */
\r
510 /* we might also use the changeendian32() function at bottom */
\r
511 movw r30, r22 /* mv param2 (ponter to msg) to Z */
\r
526 ;clr LoopC /* LoopC is named t in FIPS 180-2 */
\r
528 sha1_nextBlock_mainloop:
\r
532 andi S, 0x3C /* S is a bytepointer so *4 */
\r
535 add r26, S /* X points at w[s] */
\r
562 brlt sha1_nextBlock_mainloop_core
\r
571 1: /* this might be "outsourced" to save the jump above */
\r
586 2: /* now we just hav to do a ROTL(T) and save T back */
\r
601 sha1_nextBlock_mainloop_core: /* ther core function; T=ROTL5(a) ....*/
\r
602 /* T already contains w[s] */
\r
604 sbiw r26, 4*1 /* X points at a[4] aka e */
\r
612 adc T4, tmp1 /* T = w[s]+e */
\r
613 sbiw r26, 4*5 /* X points at a[0] aka a */
\r
618 mov tmp1, F4 /* X points at a[1] aka b */
\r
632 adc T4, F4 /* T = ROTL(a,5) + e + w[s] */
\r
634 /* now we have to do this fucking conditional stuff */
\r
635 ldi r30, lo8(sha1_nextBlock_xTable)
\r
636 ldi r31, hi8(sha1_nextBlock_xTable)
\r
643 1: ldi r30, lo8(sha1_nextBlock_KTable)
\r
644 ldi r31, hi8(sha1_nextBlock_KTable)
\r
660 /* T = ROTL(a,5) + e + kt + w[s] */
\r
662 /* wo Z-4 gerade auf kt zeigt ... */
\r
663 movw r28, r26 /* copy X in Y */
\r
664 adiw r30, 3*4 /* now Z points to the rigth locatin in our jump-vector-table */
\r
679 adc T4, tmp1 /* T = ROTL5(a) + f_t(b,c,d) + e + k_t + w[s] */
\r
680 /* X points still at a[1] aka b, Y points at a[2] aka c */
\r
682 sha1_nextBlock_update_a:
\r
683 /*first we move all vars in a[] "one up" e=d, d=c, c=b, b=a*/
\r
684 //adiw r28, 3*4 /* Y should point at a[4] aka e */
\r
694 /* Y points at a[0] aka a*/
\r
698 /* store T in a[0] aka a */
\r
703 /* Y points at a[1] aka b*/
\r
735 jmp sha1_nextBlock_mainloop
\r
736 /**************************************/
\r
741 /* add a[] to state and inc length */
\r
743 pop r26 /* now X points to state (and Y still at a[0]) */
\r
756 /* now length += 512 */
\r
757 adiw r26, 1 /* we skip the least significant byte */
\r
759 ldi tmp2, hi8(512) /* 2 */
\r
771 sha1_nextBlock_epilog:
\r
772 /* now we should clean up the stack */
\r
776 cli ; we want to be uninterrupted while updating SP
\r
794 sha1_nextBlock_xTable:
\r
796 sha1_nextBlock_KTable:
\r
801 sha1_nextBlock_JumpTable:
\r
802 jmp sha1_nextBlock_Ch
\r
803 jmp sha1_nextBlock_Parity
\r
804 jmp sha1_nextBlock_Maj
\r
805 jmp sha1_nextBlock_Parity
\r
807 /* X and Y still point at a[1] aka b ; return value in tmp1 */
\r
812 ldd tmp3, Y+3 /* load from c */
\r
814 ldd tmp3, Y+7 /* load from d */
\r
819 ldi r24, lo8(ch_str)
\r
820 ldi r25, hi8(ch_str)
\r
826 sha1_nextBlock_Maj:
\r
829 ldd tmp3, Y+3 /* load from c */
\r
831 ldd tmp4, Y+7 /* load from d */
\r
838 ldi r24, lo8(maj_str)
\r
839 ldi r25, hi8(maj_str)
\r
845 sha1_nextBlock_Parity:
\r
847 ldd tmp2, Y+3 /* load from c */
\r
849 ldd tmp2, Y+7 /* load from d */
\r
854 ldi r24, lo8(parity_str)
\r
855 ldi r25, hi8(parity_str)
\r
861 ch_str: .asciz "\r\nCh"
\r
862 maj_str: .asciz "\r\nMaj"
\r
863 parity_str: .asciz "\r\nParity"
\r
865 ;###########################################################
\r
868 ;void sha1_init(sha1_ctx_t *state){
\r
869 ; DEBUG_S("\r\nSHA1_INIT");
\r
870 ; state->h[0] = 0x67452301;
\r
871 ; state->h[1] = 0xefcdab89;
\r
872 ; state->h[2] = 0x98badcfe;
\r
873 ; state->h[3] = 0x10325476;
\r
874 ; state->h[4] = 0xc3d2e1f0;
\r
875 ; state->length = 0;
\r
877 ; param1: (Func3,r24) 16-bit pointer to sha1_ctx_t struct in ram
\r
878 ; modifys: Z(r30,r31), Func1, r22
\r
880 movw r26, r24 ; (24,25) --> (26,27) load X with param1
\r
881 ldi r30, lo8((sha1_init_vector))
\r
882 ldi r31, hi8((sha1_init_vector))
\r
883 ldi r22, 5*4 /* bytes to copy */
\r
888 brne sha1_init_vloop
\r
890 clr r1 /* this should not be needed */
\r
894 brne sha1_init_lloop
\r
904 ;###########################################################
\r
908 ; function that rotates a 32 bit word to the left
\r
909 ; param1: the 32-bit word to rotate
\r
910 ; given in r25,r24,r23,r22 (r25 is most significant)
\r
911 ; param2: an 8-bit value telling how often to rotate
\r
913 ; modifys: r21, r22
\r
942 ;###########################################################
\r
946 ; function that rotates a 32 bit word to the right
\r
947 ; param1: the 32-bit word to rotate
\r
948 ; given in r25,r24,r23,22 (r25 is most significant)
\r
949 ; param2: an 8-bit value telling how often to rotate
\r
951 ; modifys: r21, r22
\r
980 ;###########################################################
\r
982 .global change_endian32
\r
983 ; === change_endian32 ===
\r
984 ; function that changes the endianess of a 32-bit word
\r
985 ; param1: the 32-bit word
\r
986 ; given in r25,r24,r23,22 (r25 is most significant)
\r
987 ; modifys: r21, r22
\r
989 movw r20, r22 ; (r22,r23) --> (r20,r21)
\r