3 This file is part of the AVR-Crypto-Lib.
4 Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de)
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>.
21 * License: GPLv3 or later
26 #include "avr-asm-macros.S"
28 ;###########################################################
32 .hword 0xa478, 0xd76a, 0xb756, 0xe8c7, 0x70db, 0x2420, 0xceee, 0xc1bd, 0x0faf, 0xf57c
33 .hword 0xc62a, 0x4787, 0x4613, 0xa830, 0x9501, 0xfd46, 0x98d8, 0x6980, 0xf7af, 0x8b44
34 .hword 0x5bb1, 0xffff, 0xd7be, 0x895c, 0x1122, 0x6b90, 0x7193, 0xfd98, 0x438e, 0xa679
35 .hword 0x0821, 0x49b4, 0x2562, 0xf61e, 0xb340, 0xc040, 0x5a51, 0x265e, 0xc7aa, 0xe9b6
36 .hword 0x105d, 0xd62f, 0x1453, 0x0244, 0xe681, 0xd8a1, 0xfbc8, 0xe7d3, 0xcde6, 0x21e1
37 .hword 0x07d6, 0xc337, 0x0d87, 0xf4d5, 0x14ed, 0x455a, 0xe905, 0xa9e3, 0xa3f8, 0xfcef
38 .hword 0x02d9, 0x676f, 0x4c8a, 0x8d2a, 0x3942, 0xfffa, 0xf681, 0x8771, 0x6122, 0x6d9d
39 .hword 0x380c, 0xfde5, 0xea44, 0xa4be, 0xcfa9, 0x4bde, 0x4b60, 0xf6bb, 0xbc70, 0xbebf
40 .hword 0x7ec6, 0x289b, 0x27fa, 0xeaa1, 0x3085, 0xd4ef, 0x1d05, 0x0488, 0xd039, 0xd9d4
41 .hword 0x99e5, 0xe6db, 0x7cf8, 0x1fa2, 0x5665, 0xc4ac, 0x2244, 0xf429, 0xff97, 0x432a
42 .hword 0x23a7, 0xab94, 0xa039, 0xfc93, 0x59c3, 0x655b, 0xcc92, 0x8f0c, 0xf47d, 0xffef
43 .hword 0x5dd1, 0x8584, 0x7e4f, 0x6fa8, 0xe6e0, 0xfe2c, 0x4314, 0xa301, 0x11a1, 0x4e08
44 .hword 0x7e82, 0xf753, 0xf235, 0xbd3a, 0xd2bb, 0x2ad7, 0xd391, 0xeb86
51 ;###########################################################
52 ;void md5_init(md5_ctx_t *state)
53 ; param1: (r24,r25) 16-bit pointer to sha256_ctx_t struct in ram
54 ; modifys: Z(r30,r31), X(r25,r26)
55 ; size = 9+5*4 WORDS = 29 WORDS = 58 Bytes
57 movw r26, r24 ; (24,25) --> (26,27) load X with param1
58 ldi r30, lo8(md5_init_vector)
59 ldi r31, hi8(md5_init_vector)
76 ;###########################################################
78 ;void md5_init(md5_ctx_t *state)
79 ; param1: (r24,r25) 16-bit pointer to sha256_ctx_t struct in ram
81 ; cycles = 1+16*3+4*2+4 = 1+48+12 = 61
82 ; size = 1+16*2+4+1 WORDS = 38 WORDS = 76 Bytes
124 ;###########################################################
128 uint32_t md5_F(uint32_t x, uint32_t y, uint32_t z){
129 return ((x&y)|((~x)&z));
156 uint32_t md5_G(uint32_t x, uint32_t y, uint32_t z){
157 return ((x&z)|((~z)&y));
184 uint32_t md5_H(uint32_t x, uint32_t y, uint32_t z){
203 uint32_t md5_I(uint32_t x, uint32_t y, uint32_t z){
204 return (y ^ (x | (~z)));
233 ; (as+0)&3 (as+3)&3 (as+1)&3 (as+2)&3
235 ; AS_SAVE0 AS_SAVE1 AS_SAVE2 AS_SAVE3
236 .byte 1*4, 0*4, 2*4, 3*4 ;as=1
237 .byte 2*4, 1*4, 3*4, 0*4 ;as=2
238 .byte 3*4, 2*4, 0*4, 1*4 ;as=3
239 .byte 0*4, 3*4, 1*4, 2*4 ;as=4
241 ;###########################################################
250 void md5_core(uint32_t* a, void* block, uint8_t as, uint8_t s, uint8_t i, uint8_t fi){
252 md5_func_t* funcs[]={md5_F, md5_G, md5_H, md5_I};
254 / * a = b + ((a + F(b,c,d) + X[k] + T[i]) <<< s). * /
255 t = a[as] + funcs[fi](a[(as+1)&3], a[(as+2)&3], a[(as+3)&3]) + *((uint32_t*)block) + md5_T[i] ;
256 a[as]=a[(as+1)&3] + ROTL32(t, s);
307 ldi r30, lo8(T_table)
308 ldi r31, hi8(T_table)
317 /* loading T[i] into ACCU */
322 /* add *block to ACCU */
332 /* add a[as+0&3] to ACCU */
333 ldi r30, lo8(as_table)
334 ldi r31, hi8(as_table)
340 adc r31, r1 ; Z points to the correct row in as_table
345 movw r26, r24 ; X points to a[0]
347 adc r27, r1 ; X points at a[as&3]
359 /* loading z value */
368 /* loading x value */
377 /* loading y value */
381 ldi r30, pm_lo8(jump_table)
382 ldi r31, pm_hi8(jump_table)
384 adc r31, r1 ; Z points to the correct entry in our jump table
390 ijmp /* calls the function pointed by Z */
393 /* add ACCU to result of f() */
426 /* add a[(as+1)&3] */
453 ;###################################################################
455 void md5_nextBlock(md5_ctx_t *state, void* block){
465 uint8_t s1t[]={7,12,17,22}; // 1,-1 1,4 2,-1 3,-2
468 md5_core(a, &(((uint32_t*)block)[m*4+n]), 4-n, s1t[n],i++,0);
472 uint8_t s2t[]={5,9,14,20}; // 1,-3 1,1 2,-2 2,4
475 md5_core(a, &(((uint32_t*)block)[(1+m*4+n*5)&0xf]), 4-n, s2t[n],i++,1);
479 uint8_t s3t[]={4,11,16,23}; // 0,4 1,3 2,0 3,-1
482 md5_core(a, &(((uint32_t*)block)[(5-m*4+n*3)&0xf]), 4-n, s3t[n],i++,2);
486 uint8_t s4t[]={6,10,15,21}; // 1,-2 1,2 2,-1 3,-3
489 md5_core(a, &(((uint32_t*)block)[(0-m*4+n*7)&0xf]), 4-n, s4t[n],i++,3);
500 shift_table_1: .byte 7,12,17,22
501 shift_table_2: .byte 5, 9,14,20
502 shift_table_3: .byte 4,11,16,23
503 shift_table_4: .byte 6,10,15,21
507 .byte 0x04, 0x18, 0x2c, 0x00
508 .byte 0x14, 0x28, 0x3c, 0x10
509 .byte 0x24, 0x38, 0x0c, 0x20
510 .byte 0x34, 0x08, 0x1c, 0x30
514 .byte 0x14, 0x20, 0x2c, 0x38
515 .byte 0x04, 0x10, 0x1c, 0x28
516 .byte 0x34, 0x00, 0x0c, 0x18
517 .byte 0x24, 0x30, 0x3c, 0x08
521 .byte 0x00, 0x1c, 0x38, 0x14
522 .byte 0x30, 0x0c, 0x28, 0x04
523 .byte 0x20, 0x3c, 0x18, 0x34
524 .byte 0x10, 0x2c, 0x08, 0x24
531 .global md5_nextBlock
539 adiw r30, 1 /* Z now points to the beginning of the allocated memory */
549 /* state now copied to stack memory */
569 ldi r30, lo8(shift_table_1)
570 ldi r31, hi8(shift_table_1)
594 ldi r30, lo8(index_table_r2)
595 ldi r31, hi8(index_table_r2)
606 ldi r30, lo8(shift_table_2)
607 ldi r31, hi8(shift_table_2)
631 ldi r30, lo8(index_table_r3)
632 ldi r31, hi8(index_table_r3)
643 ldi r30, lo8(shift_table_3)
644 ldi r31, hi8(shift_table_3)
668 ldi r30, lo8(index_table_r4)
669 ldi r31, hi8(index_table_r4)
680 ldi r30, lo8(shift_table_4)
681 ldi r31, hi8(shift_table_4)
698 pop r26 /* X now points to the context */
744 ;###############################################################################
746 void md5_lastBlock(md5_ctx_t *state, const void* block, uint16_t length_b){
749 while (length_b >= 512){
750 md5_nextBlock(state, block);
752 block = ((uint8_t*)block) + 512/8;
755 memcpy(b, block, length_b/8);
756 / * insert padding one * /
760 t = ((uint8_t*)block)[l];
761 t |= (0x80>>(length_b%8));
766 / * insert length value * /
767 if(l+sizeof(uint64_t) >= 512/8){
768 md5_nextBlock(state, b);
772 *((uint64_t*)&b[64-sizeof(uint64_t)]) = (state->counter * 512) + length_b;
773 md5_nextBlock(state, b);
776 ; state_ptr : r24,r25
777 ; block_ptr : r22,r23
779 .global md5_lastBlock
785 movw r16, r20 /* length_b */
786 movw r14, r22 /* block_ptr */
787 movw r12, r24 /* state_ptr */
790 cpi r17, 2 /* hi8(512) */
804 adiw r30, 1 /* adjust Z to point to buffer */
813 lsr r24 /* r24 now holds how many bytes are to copy */
815 sub r18, r24 /* r18 will hold the amount of used bytes in buffer */
822 rjmp 4b /* Z points to the byte after msg in buffer */
823 5: /* append 1-bit */
829 dec r18 /* 'allocate' another byte in buffer */
830 rjmp after_bit_fucking
843 set /* store in t if the counter will also fit in this block (1 if fit)*/
847 1: /* fill remaning buffer with zeros */
854 movw r14, r30 /* r14:r15 now points to buffer */
856 /* counter does not fit, finalize this block */
868 movw r26, r12 /* X points to state */
874 brts post_counter_decrement /* do not decremen because counter fits */
880 post_counter_decrement:
888 mov r18, r16 /* r16:r17 length_b */
916 ;###############################################################################
932 ;###############################################################################
940 movw r8, r30 /* ctx */
941 movw r10, r24 /* dest */
942 movw r12, r22 /* msg */
943 movw r14, r18 /* length (low) */
944 movw r16, r20 /* length (high) */