out _SFR_IO_ADDR(SPL), \reg1
.endm
+
+.macro stack_alloc_large size:req, reg1=r30, reg2=r31
+ in r0, _SFR_IO_ADDR(SREG)
+ in \reg1, _SFR_IO_ADDR(SPL)
+ in \reg2, _SFR_IO_ADDR(SPH)
+ subi \reg1, lo8(\size)
+ sbci \reg2, hi8(\size)
+ cli
+ out _SFR_IO_ADDR(SPH), \reg2
+ out _SFR_IO_ADDR(SREG), r0
+ out _SFR_IO_ADDR(SPL), \reg1
+.endm
+
+.macro stack_free_large size:req, reg1=r30, reg2=r31
+ in r0, _SFR_IO_ADDR(SREG)
+ in \reg1, _SFR_IO_ADDR(SPL)
+ in \reg2, _SFR_IO_ADDR(SPH)
+ adiw \reg1, 63
+ adiw \reg1, (\size-63)
+ cli
+ out _SFR_IO_ADDR(SPH), \reg2
+ out _SFR_IO_ADDR(SREG), r0
+ out _SFR_IO_ADDR(SPL), \reg1
+.endm
+
+
+
/*******************************************************************************
* END of MACRO SECTION *
*******************************************************************************/
md5_core_asm:
- push r28
- push r29
- push_range 4, 17
+ push r16
+ push r17
+ push_range 4, 8
ldi r30, lo8(T_table)
ldi r31, hi8(T_table)
lsl P_I
st X+, r24
st X , r25
md5_core_exit:
- pop_range 4, 17
- pop r29
- pop r28
+ pop_range 4, 8
+ pop r17
+ pop r16
ret
;###################################################################
.global md5_nextBlock
md5_nextBlock:
stack_alloc 16
- push_range 2, 8
- push r16
- push r17
+ push_range 2, 17
+ push r28
+ push r29
push r24
push r25
adiw r30, 1 /* Z now points to the beginning of the allocated memory */
st X+, r0
2:
- pop r17
- pop r16
- pop_range 2, 8
+ pop r29
+ pop r28
+ pop_range 2, 17
stack_free 16
ret
-
-
-
-
+;###############################################################################
+/*
+void md5_lastBlock(md5_ctx_t *state, const void* block, uint16_t length_b){
+ uint16_t l;
+ uint8_t b[64];
+ while (length_b >= 512){
+ md5_nextBlock(state, block);
+ length_b -= 512;
+ block = ((uint8_t*)block) + 512/8;
+ }
+ memset(b, 0, 64);
+ memcpy(b, block, length_b/8);
+ / * insert padding one * /
+ l=length_b/8;
+ if(length_b%8){
+ uint8_t t;
+ t = ((uint8_t*)block)[l];
+ t |= (0x80>>(length_b%8));
+ b[l]=t;
+ }else{
+ b[l]=0x80;
+ }
+ / * insert length value * /
+ if(l+sizeof(uint64_t) >= 512/8){
+ md5_nextBlock(state, b);
+ state->counter--;
+ memset(b, 0, 64-8);
+ }
+ *((uint64_t*)&b[64-sizeof(uint64_t)]) = (state->counter * 512) + length_b;
+ md5_nextBlock(state, b);
+}
+*/
+; state_ptr : r24,r25
+; block_ptr : r22,r23
+; length_b : r20,r21
+.global md5_lastBlock
+md5_lastBlock:
+ stack_alloc_large 64
+ push_range 12, 17
+ push r30
+ push r31
+ movw r16, r20 /* length_b */
+ movw r14, r22 /* block_ptr */
+ movw r12, r24 /* state_ptr */
+
+ cpi r17, 2 /* hi8(512) */
+ brlo 2f
+1:
+ movw r24, r12
+ movw r22, r14
+ rcall md5_nextBlock
+ ldi r18, 64
+ add r14, r18
+ adc r15, r1
+ subi r17, 2
+ brge 1b
+2:
+ pop r31
+ pop r30
+
+ adiw r30, 1
+ movw r26, r14
+ movw r24, r16
+ adiw r24, 7
+
+ lsr r25
+ ror r24
+ lsr r24
+ lsr r24 /* r24 now holds how many bytes are to copy */
+ ldi r18, 64
+ sub r18, r24
+ tst r24
+4:
+ breq 5f
+ ld r0, X+
+ st Z+, r0
+ dec r24
+ rjmp 4b
+5: /* append 1-bit */
+ mov r20, r16
+ andi r20, 0x07
+ brne bit_fucking
+ ldi r19, 0x80
+ st Z+, r19
+ dec r18
+ rjmp after_bit_fucking
+bit_fucking:
+ ldi r19, 0x80
+1:
+ lsr r19
+ dec r20
+ brne 1b
+ or r0, r19
+ st -Z, r0
+ adiw r30, 1
+after_bit_fucking:
+ clt
+ cpi r18, 8
+ brmi 2f
+ set /* store in t if the counter will also fit in this block */
+2:
+ tst r18
+ breq 2f
+1:
+ st Z+, r1
+ dec r18
+ brne 1b
+2:
+ sbiw r30, 63
+ sbiw r30, 1
+ movw r14, r30
+ brts load_counter
+ movw r24, r12
+ movw r22, r14
+ rcall md5_nextBlock
+ movw r30, r14
+ ldi r20, 64-8
+3:
+ st Z+, r1
+ dec r20
+ brne 3b
+
+load_counter:
+ movw r26, r12
+ adiw r26, 16
+ ld r19, X+
+ ld r20, X+
+ ld r21, X+
+ ld r22, X+
+ brts post_counter_decrement
+ subi r19, 1
+ sbci r20, 0
+ sbci r21, 0
+ sbci r22, 0
+post_counter_decrement:
+ clr r18
+ clr r23
+ lsl r19
+ rol r20
+ rol r21
+ rol r22
+ rol r23
+ add r18, r16
+ adc r19, r17
+ adc r20, r1
+ adc r21, r1
+ adc r22, r1
+ adc r23, r1
+ movw r30, r14
+ adiw r30, 64-8
+ st Z+, r18
+ st Z+, r19
+ st Z+, r20
+ st Z+, r21
+ st Z+, r22
+ st Z+, r23
+ st Z+, r1
+ st Z, r1
+
+ sbiw r30, 63
+; sbiw r30, 1
+ movw r24, r12
+ movw r22, r30
+ rcall md5_nextBlock
+md5_lastBlock_exit:
+ pop_range 12, 17
+ stack_free_large 64
+ ret
#undef DEBUG
-void md5_core(uint32_t* a, void* block, uint8_t as, uint8_t s, uint8_t i, uint8_t fi);
-/*
-#define ROTL32(x,n) (((x)<<(n)) | ((x)>>(32-(n))))
-
-static
-void md5_core(uint32_t* a, void* block, uint8_t as, uint8_t s, uint8_t i, uint8_t fi){
- uint32_t t;
- md5_func_t* funcs[]={md5_F, md5_G, md5_H, md5_I};
- as &= 0x3;
- // * a = b + ((a + F(b,c,d) + X[k] + T[i]) <<< s). * /
-#ifdef DEBUG
- char funcc[]={'*', '-', '+', '~'};
- uart_putstr("\r\n DBG: md5_core [");
- uart_putc(funcc[fi]);
- uart_hexdump(&as, 1); uart_putc(' ');
- uart_hexdump(&k, 1); uart_putc(' ');
- uart_hexdump(&s, 1); uart_putc(' ');
- uart_hexdump(&i, 1); uart_putc(']');
-#endif
- t = a[as] + funcs[fi](a[(as+1)&3], a[(as+2)&3], a[(as+3)&3]) + *((uint32_t*)block) + md5_T[i] ;
- a[as]=a[(as+1)&3] + ROTL32(t, s);
+void md5_ctx2hash(md5_hash_t* dest, const md5_ctx_t* state){
+ memcpy(dest, state->a, MD5_HASH_BYTES);
}
-*/
-
-#if 0
-void md5_nextBlock(md5_ctx_t *state, void* block){
- uint32_t a[4];
- uint8_t m,n,i=0;
- /* this requires other mixed sboxes */
-#ifdef DEBUG
- uart_putstr("\r\n DBG: md5_nextBlock: block:\r\n");
- uart_hexdump(block, 16); uart_putstr("\r\n");
- uart_hexdump(block+16, 16); uart_putstr("\r\n");
- uart_hexdump(block+32, 16); uart_putstr("\r\n");
- uart_hexdump(block+48, 16); uart_putstr("\r\n");
-#endif
-
- a[0]=state->a[0];
- a[1]=state->a[1];
- a[2]=state->a[2];
- a[3]=state->a[3];
- /* round 1 */
- uint8_t s1t[]={7,12,17,22}; // 1,-1 1,4 2,-1 3,-2
- for(m=0;m<4;++m){
- for(n=0;n<4;++n){
- md5_core(a, &(((uint32_t*)block)[m*4+n]), 4-n, s1t[n],i++,0);
- }
- }
- /* round 2 */
- uint8_t s2t[]={5,9,14,20}; // 1,-3 1,1 2,-2 2,4
- for(m=0;m<4;++m){
- for(n=0;n<4;++n){
- md5_core(a, &(((uint32_t*)block)[(1+m*4+n*5)&0xf]), 4-n, s2t[n],i++,1);
- }
- }
- /* round 3 */
- uint8_t s3t[]={4,11,16,23}; // 0,4 1,3 2,0 3,-1
- for(m=0;m<4;++m){
- for(n=0;n<4;++n){
- md5_core(a, &(((uint32_t*)block)[(5-m*4+n*3)&0xf]), 4-n, s3t[n],i++,2);
- }
- }
- /* round 4 */
- uint8_t s4t[]={6,10,15,21}; // 1,-2 1,2 2,-1 3,-3
- for(m=0;m<4;++m){
- for(n=0;n<4;++n){
- md5_core(a, &(((uint32_t*)block)[(0-m*4+n*7)&0xf]), 4-n, s4t[n],i++,3);
- }
+void md5(md5_hash_t* dest, const void* msg, uint32_t length_b){
+ md5_ctx_t ctx;
+ md5_init(&ctx);
+ while(length_b>=MD5_BLOCK_BITS){
+ md5_nextBlock(&ctx, msg);
+ msg = (uint8_t*)msg + MD5_BLOCK_BYTES;
+ length_b -= MD5_BLOCK_BITS;
}
- state->a[0] += a[0];
- state->a[1] += a[1];
- state->a[2] += a[2];
- state->a[3] += a[3];
- state->counter++;
+ md5_lastBlock(&ctx, msg, length_b);
+ md5_ctx2hash(dest, &ctx);
}
-#endif
-
-void md5_lastBlock(md5_ctx_t *state, void* block, uint16_t length_b){
- uint16_t l;
- uint8_t b[64];
- while (length_b >= 512){
- md5_nextBlock(state, block);
- length_b -= 512;
- block = ((uint8_t*)block) + 512/8;
- }
- memset(b, 0, 64);
- memcpy(b, block, length_b/8);
- /* insert padding one */
- l=length_b/8;
- if(length_b%8){
- uint8_t t;
- t = ((uint8_t*)block)[l];
- t |= (0x80>>(length_b%8));
- b[l]=t;
- }else{
- b[l]=0x80;
- }
- /* insert length value */
- if(l+sizeof(uint64_t) >= 512/8){
- md5_nextBlock(state, b);
- state->counter--;
- memset(b, 0, 64);
- }
- *((uint64_t*)&b[64-sizeof(uint64_t)]) = (state->counter * 512) + length_b;
- md5_nextBlock(state, b);
-}
-
a[as]=a[(as+1)&3] + ROTL32(t, s);
}
-void md5_nextBlock(md5_ctx_t *state, void* block){
+void md5_nextBlock(md5_ctx_t *state, const void* block){
uint32_t a[4];
uint8_t m,n,i=0;
/* this requires other mixed sboxes */
state->counter++;
}
-void md5_lastBlock(md5_ctx_t *state, void* block, uint16_t length_b){
+void md5_lastBlock(md5_ctx_t *state, const void* block, uint16_t length_b){
uint16_t l;
uint8_t b[64];
while (length_b >= 512){
if(l+sizeof(uint64_t) >= 512/8){
md5_nextBlock(state, b);
state->counter--;
- memset(b, 0, 64);
+ memset(b, 0, 64-8);
}
*((uint64_t*)&b[64-sizeof(uint64_t)]) = (state->counter * 512) + length_b;
md5_nextBlock(state, b);
}
+
+void md5_ctx2hash(md5_hash_t* dest, const md5_ctx_t* state){
+ memcpy(dest, state->a, MD5_HASH_BYTES);
+}
+
+void md5(md5_hash_t* dest, const void* msg, uint32_t length_b){
+ md5_ctx_t ctx;
+ md5_init(&ctx);
+ while(length_b>=MD5_BLOCK_BITS){
+ md5_nextBlock(&ctx, msg);
+ msg = (uint8_t*)msg + MD5_BLOCK_BYTES;
+ length_b -= MD5_BLOCK_BITS;
+ }
+ md5_lastBlock(&ctx, msg, length_b);
+ md5_ctx2hash(dest, &ctx);
+}
+
#include <stdint.h>
+
+#define MD5_HASH_BITS 128
+#define MD5_HASH_BYTES (MD5_HASH_BITS/8)
+#define MD5_BLOCK_BITS 512
+#define MD5_BLOCK_BYTES (MD5_BLOCK_BITS/8)
+
+
typedef struct md5_ctx_st {
uint32_t a[4];
uint32_t counter;
} md5_ctx_t;
+typedef uint8_t md5_hash_t[MD5_HASH_BYTES];
+
void md5_init(md5_ctx_t *s);
-void md5_nextBlock(md5_ctx_t *state, void* block);
-void md5_lastBlock(md5_ctx_t *state, void* block, uint16_t length);
+void md5_nextBlock(md5_ctx_t *state, const void* block);
+void md5_lastBlock(md5_ctx_t *state, const void* block, uint16_t length);
+void md5_ctx2hash(md5_hash_t* dest, const md5_ctx_t* state);
+void md5(md5_hash_t* dest, const void* msg, uint32_t length_b);
#endif /*MD5_H_*/
*/
void testrun_md5(void){
- md5_ctx_t s;
+ md5_hash_t hash;
char* testv[]={
"",
"a",
uart_putstr("\r\n MD5 (\"");
uart_putstr(testv[i]);
uart_putstr("\") = \r\n\t");
- md5_init(&s);
- md5_lastBlock(&s, testv[i], strlen(testv[i])*8);
- uart_hexdump(&(s.a[0]), 16);
+ md5(&hash, testv[i], strlen(testv[i])*8);
+ uart_hexdump(hash, 16);
}
}