/* md5-asm.S */
/*
- This file is part of the Crypto-avr-lib/microcrypt-lib.
+ This file is part of the AVR-Crypto-Lib.
Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de)
This program is free software: you can redistribute it and/or modify
* Date: 2008-11-15
*/
-.include "avr-asm-macros.S"
+
+#include "avr-asm-macros.S"
;###########################################################
; S-BOX
md5_core_asm:
- push r28
- push r29
- push_range 4, 17
+ push r16
+ push r17
+ push_range 4, 8
ldi r30, lo8(T_table)
ldi r31, hi8(T_table)
lsl P_I
st X+, r24
st X , r25
md5_core_exit:
- pop_range 4, 17
- pop r29
- pop r28
+ pop_range 4, 8
+ pop r17
+ pop r16
ret
;###################################################################
state->counter++;
}
*/
-/*
-shift_table:
- .byte 7,12,17,22
- .byte 5, 9,14,20
- .byte 4,11,16,23
- .byte 6,10,15,21
+shift_table_1: .byte 7,12,17,22
+shift_table_2: .byte 5, 9,14,20
+shift_table_3: .byte 4,11,16,23
+shift_table_4: .byte 6,10,15,21
+
+index_table_r2:
+;(1+m*4+n*5)&0xf:
+ .byte 0x04, 0x18, 0x2c, 0x00
+ .byte 0x14, 0x28, 0x3c, 0x10
+ .byte 0x24, 0x38, 0x0c, 0x20
+ .byte 0x34, 0x08, 0x1c, 0x30
+
+index_table_r3:
+;(5-m*4+n*3)&0xf:
+ .byte 0x14, 0x20, 0x2c, 0x38
+ .byte 0x04, 0x10, 0x1c, 0x28
+ .byte 0x34, 0x00, 0x0c, 0x18
+ .byte 0x24, 0x30, 0x3c, 0x08
+
+index_table_r4:
+;(0-m*4+n*7)&0xf:
+ .byte 0x00, 0x1c, 0x38, 0x14
+ .byte 0x30, 0x0c, 0x28, 0x04
+ .byte 0x20, 0x3c, 0x18, 0x34
+ .byte 0x10, 0x2c, 0x08, 0x24
+
+APTR_REG = 2
+BPTR_REG = 4
+N_REG = 6
+M_REG = 7
+I_REG = 8
+.global md5_nextBlock
md5_nextBlock:
- stack_alloc 4*4
-
+ stack_alloc 16
+ push_range 2, 17
+ push r28
+ push r29
+ push r24
+ push r25
+ adiw r30, 1 /* Z now points to the beginning of the allocated memory */
+ movw r2, r30
+ movw r4, r22
+ movw r26, r24
+ ldi r20, 16
+1:
+ ld r0, X+
+ st Z+, r0
+ dec r20
+ brne 1b
+ /* state now copied to stack memory */
+ clr I_REG
+ /* Round 1 */
+ clr M_REG
+ ldi r17, 4
+1:
+ clr N_REG
+ ldi r16, 4
+2:
+ movw r24, APTR_REG
+ movw r22, BPTR_REG
+ mov r0, M_REG
+ lsl r0
+ lsl r0
+ add r0, N_REG
+ lsl r0
+ lsl r0
+ add r22, r0
+ adc r23, r1
+ mov r21, r16
+ ldi r30, lo8(shift_table_1)
+ ldi r31, hi8(shift_table_1)
+ add r30, N_REG
+ adc r31, r1
+ lpm r20, Z
+ mov r19, I_REG
+ ldi r18, 0
+ rcall md5_core_asm
+ inc I_REG
+ inc N_REG
+ dec r16
+ brne 2b
+ inc M_REG
+ dec r17
+ brne 1b
+
+ /* Round 2 */
+ clr M_REG
+ ldi r17, 4
+1:
+ clr N_REG
+ ldi r16, 4
+2:
+ movw r24, APTR_REG
+ movw r22, BPTR_REG
+ ldi r30, lo8(index_table_r2)
+ ldi r31, hi8(index_table_r2)
+ mov r0, M_REG
+ lsl r0
+ lsl r0
+ add r0, N_REG
+ add r30, r0
+ adc r31, r1
+ lpm r0, Z
+ add r22, r0
+ adc r23, r1
+ mov r21, r16
+ ldi r30, lo8(shift_table_2)
+ ldi r31, hi8(shift_table_2)
+ add r30, N_REG
+ adc r31, r1
+ lpm r20, Z
+ mov r19, I_REG
+ ldi r18, 1
+ rcall md5_core_asm
+ inc I_REG
+ inc N_REG
+ dec r16
+ brne 2b
+ inc M_REG
+ dec r17
+ brne 1b
+
+ /* Round 3 */
+ clr M_REG
+ ldi r17, 4
+1:
+ clr N_REG
+ ldi r16, 4
+2:
+ movw r24, APTR_REG
+ movw r22, BPTR_REG
+ ldi r30, lo8(index_table_r3)
+ ldi r31, hi8(index_table_r3)
+ mov r0, M_REG
+ lsl r0
+ lsl r0
+ add r0, N_REG
+ add r30, r0
+ adc r31, r1
+ lpm r0, Z
+ add r22, r0
+ adc r23, r1
+ mov r21, r16
+ ldi r30, lo8(shift_table_3)
+ ldi r31, hi8(shift_table_3)
+ add r30, N_REG
+ adc r31, r1
+ lpm r20, Z
+ mov r19, I_REG
+ ldi r18, 2
+ rcall md5_core_asm
+ inc I_REG
+ inc N_REG
+ dec r16
+ brne 2b
+ inc M_REG
+ dec r17
+ brne 1b
+
+ /* Round 4 */
+ clr M_REG
+ ldi r17, 4
+1:
+ clr N_REG
+ ldi r16, 4
+2:
+ movw r24, APTR_REG
+ movw r22, BPTR_REG
+ ldi r30, lo8(index_table_r4)
+ ldi r31, hi8(index_table_r4)
+ mov r0, M_REG
+ lsl r0
+ lsl r0
+ add r0, N_REG
+ add r30, r0
+ adc r31, r1
+ lpm r0, Z
+ add r22, r0
+ adc r23, r1
+ mov r21, r16
+ ldi r30, lo8(shift_table_4)
+ ldi r31, hi8(shift_table_4)
+ add r30, N_REG
+ adc r31, r1
+ lpm r20, Z
+ mov r19, I_REG
+ ldi r18, 3
+ rcall md5_core_asm
+ inc I_REG
+ inc N_REG
+ dec r16
+ brne 2b
+ inc M_REG
+ dec r17
+ brne 1b
+
+
+ pop r27
+ pop r26 /* X now points to the context */
+ movw r30, APTR_REG
+ ldi r16, 4
+1:
+ ld r0, X
+ ld r2, Z+
+ add r0, r2
+ st X+, r0
+ ld r0, X
+ ld r2, Z+
+ adc r0, r2
+ st X+, r0
+ ld r0, X
+ ld r2, Z+
+ adc r0, r2
+ st X+, r0
+ ld r0, X
+ ld r2, Z+
+ adc r0, r2
+ st X+, r0
+ dec r16
+ brne 1b
+ ld r0, X
+ inc r0
+ st X+, r0
+ brne 2f
+ ld r0, X
+ inc r0
+ st X+, r0
+ brne 2f
+ ld r0, X
+ inc r0
+ st X+, r0
+ brne 2f
+ ld r0, X
+ inc r0
+ st X+, r0
+2:
+ pop r29
+ pop r28
+ pop_range 2, 17
+ stack_free 16
+ ret
- stack_free 4*4
+;###############################################################################
+/*
+void md5_lastBlock(md5_ctx_t *state, const void* block, uint16_t length_b){
+ uint16_t l;
+ uint8_t b[64];
+ while (length_b >= 512){
+ md5_nextBlock(state, block);
+ length_b -= 512;
+ block = ((uint8_t*)block) + 512/8;
+ }
+ memset(b, 0, 64);
+ memcpy(b, block, length_b/8);
+ / * insert padding one * /
+ l=length_b/8;
+ if(length_b%8){
+ uint8_t t;
+ t = ((uint8_t*)block)[l];
+ t |= (0x80>>(length_b%8));
+ b[l]=t;
+ }else{
+ b[l]=0x80;
+ }
+ / * insert length value * /
+ if(l+sizeof(uint64_t) >= 512/8){
+ md5_nextBlock(state, b);
+ state->counter--;
+ memset(b, 0, 64-8);
+ }
+ *((uint64_t*)&b[64-sizeof(uint64_t)]) = (state->counter * 512) + length_b;
+ md5_nextBlock(state, b);
+}
+*/
+; state_ptr : r24,r25
+; block_ptr : r22,r23
+; length_b : r20,r21
+.global md5_lastBlock
+md5_lastBlock:
+ stack_alloc_large 64
+ push_range 12, 17
+ push r30
+ push r31
+ movw r16, r20 /* length_b */
+ movw r14, r22 /* block_ptr */
+ movw r12, r24 /* state_ptr */
+ ldi r18, 64
+2:
+ cpi r17, 2 /* hi8(512) */
+ brlo 2f
+1:
+ movw r24, r12
+ movw r22, r14
+ rcall md5_nextBlock
+ add r14, r18
+ adc r15, r1
+ subi r17, 2
+ rjmp 2b
+2:
+ pop r31
+ pop r30
+
+ adiw r30, 1 /* adjust Z to point to buffer */
+ movw r26, r14
+ movw r24, r16
+ adiw r24, 7
+
+ lsr r25
+ ror r24
+ lsr r25
+ ror r24
+ lsr r24 /* r24 now holds how many bytes are to copy */
+ ldi r18, 64
+ sub r18, r24 /* r18 will hold the amount of used bytes in buffer */
+ tst r24
+4:
+ breq 5f
+ ld r0, X+
+ st Z+, r0
+ dec r24
+ rjmp 4b /* Z points to the byte after msg in buffer */
+5: /* append 1-bit */
+ mov r20, r16
+ ldi r19, 0x80
+ andi r20, 0x07
+ brne bit_fucking
+ st Z+, r19
+ dec r18 /* 'allocate' another byte in buffer */
+ rjmp after_bit_fucking
+bit_fucking:
+1:
+ lsr r19
+ dec r20
+ brne 1b
+ or r0, r19
+ st -Z, r0
+ adiw r30, 1
+after_bit_fucking:
+ clt
+ cpi r18, 8
+ brmi 2f
+ set /* store in t if the counter will also fit in this block (1 if fit)*/
+2:
+ tst r18
+ breq 2f
+1: /* fill remaning buffer with zeros */
+ st Z+, r1
+ dec r18
+ brne 1b
+2:
+ sbiw r30, 63
+ sbiw r30, 1
+ movw r14, r30 /* r14:r15 now points to buffer */
+ brts load_counter
+ /* counter does not fit, finalize this block */
+ movw r24, r12
+ movw r22, r14
+ rcall md5_nextBlock
+ movw r30, r14
+ ldi r20, 64-8
+3:
+ st Z+, r1
+ dec r20
+ brne 3b
+
+load_counter:
+ movw r26, r12 /* X points to state */
+ adiw r26, 16
+ ld r19, X+
+ ld r20, X+
+ ld r21, X+
+ ld r22, X+
+ brts post_counter_decrement /* do not decremen because counter fits */
+counter_decrement:
+ subi r19, 1
+ sbci r20, 0
+ sbci r21, 0
+ sbci r22, 0
+post_counter_decrement:
+ clr r18
+ clr r23
+ lsl r19
+ rol r20
+ rol r21
+ rol r22
+ rol r23
+ mov r18, r16 /* r16:r17 length_b */
+ add r19, r17
+ adc r20, r1
+ adc r21, r1
+ adc r22, r1
+ adc r23, r1
+ movw r30, r14
+ adiw r30, 64-8
+ st Z+, r18
+ st Z+, r19
+ st Z+, r20
+ st Z+, r21
+ st Z+, r22
+ st Z+, r23
+ st Z+, r1
+ st Z, r1
+
+ sbiw r30, 63
+; sbiw r30, 1
+ movw r24, r12
+ movw r22, r30
+ rcall md5_nextBlock
+md5_lastBlock_exit:
+ pop_range 12, 17
+ stack_free_large 64
+ ret
-*/
+;###############################################################################
+.global md5_ctx2hash
+md5_ctx2hash:
+ movw r26, r24
+ movw r30, r22
+ ldi r22, 16
+1:
+ ld r0, Z+
+ st X+, r0
+ dec r22
+ brne 1b
+ ret
+;###############################################################################
+
+
+.global md5
+md5:
+ stack_alloc 20
+ push_range 8, 17
+ adiw r30, 1
+ movw r8, r30 /* ctx */
+ movw r10, r24 /* dest */
+ movw r12, r22 /* msg */
+ movw r14, r18 /* length (low) */
+ movw r16, r20 /* length (high) */
+ movw r24, r30
+ rcall md5_init
+1:
+ tst r16
+ brne next_round
+ tst r17
+ breq last_round
+next_round:
+ movw r24, r8
+ movw r22, r12
+ rcall md5_nextBlock
+ ldi r22, 64
+ add r12, r22
+ adc r13, r1
+ ldi r22, 2
+ sub r15, r22
+ sbci r16, 0
+ sbci r17, 0
+ rjmp 1b
+last_round:
+ movw r24, r8
+ movw r22, r12
+ movw r20, r14
+ rcall md5_lastBlock
+ movw r24, r10
+ movw r22, r8
+ rcall md5_ctx2hash
+ pop_range 8, 17
+ stack_free 20
+ ret