X-Git-Url: https://git.cryptolib.org/?a=blobdiff_plain;f=md5-asm.S;h=de3b170092330281674357475131a58019413d22;hb=5ef100e38af8134b518c1307f17ad21dfd46f5dd;hp=ee5f94243819055ffffcca5b358282e1a033e4c2;hpb=5ac75cfae217122b540c1a6d258054230dc534c3;p=avr-crypto-lib.git diff --git a/md5-asm.S b/md5-asm.S index ee5f942..de3b170 100644 --- a/md5-asm.S +++ b/md5-asm.S @@ -1,6 +1,6 @@ /* md5-asm.S */ /* - This file is part of the Crypto-avr-lib/microcrypt-lib. + This file is part of the AVR-Crypto-Lib. Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de) This program is free software: you can redistribute it and/or modify @@ -22,7 +22,8 @@ * Date: 2008-11-15 */ -.include "avr-asm-macros.S" + +#include "avr-asm-macros.S" ;########################################################### ; S-BOX @@ -300,9 +301,9 @@ ARG_Z3 = 17 md5_core_asm: - push r28 - push r29 - push_range 4, 17 + push r16 + push r17 + push_range 4, 8 ldi r30, lo8(T_table) ldi r31, hi8(T_table) lsl P_I @@ -444,9 +445,9 @@ fixrotl: st X+, r24 st X , r25 md5_core_exit: - pop_range 4, 17 - pop r29 - pop r28 + pop_range 4, 8 + pop r17 + pop r16 ret ;################################################################### @@ -495,27 +496,482 @@ void md5_nextBlock(md5_ctx_t *state, void* block){ state->counter++; } */ -/* -shift_table: - .byte 7,12,17,22 - .byte 5, 9,14,20 - .byte 4,11,16,23 - .byte 6,10,15,21 +shift_table_1: .byte 7,12,17,22 +shift_table_2: .byte 5, 9,14,20 +shift_table_3: .byte 4,11,16,23 +shift_table_4: .byte 6,10,15,21 + +index_table_r2: +;(1+m*4+n*5)&0xf: + .byte 0x04, 0x18, 0x2c, 0x00 + .byte 0x14, 0x28, 0x3c, 0x10 + .byte 0x24, 0x38, 0x0c, 0x20 + .byte 0x34, 0x08, 0x1c, 0x30 + +index_table_r3: +;(5-m*4+n*3)&0xf: + .byte 0x14, 0x20, 0x2c, 0x38 + .byte 0x04, 0x10, 0x1c, 0x28 + .byte 0x34, 0x00, 0x0c, 0x18 + .byte 0x24, 0x30, 0x3c, 0x08 + +index_table_r4: +;(0-m*4+n*7)&0xf: + .byte 0x00, 0x1c, 0x38, 0x14 + .byte 0x30, 0x0c, 0x28, 0x04 + .byte 0x20, 0x3c, 0x18, 0x34 + .byte 0x10, 0x2c, 0x08, 0x24 + +APTR_REG = 2 +BPTR_REG = 4 +N_REG = 6 +M_REG = 7 +I_REG = 8 +.global md5_nextBlock md5_nextBlock: - stack_alloc 4*4 - + stack_alloc 16 + push_range 2, 17 + push r28 + push r29 + push r24 + push r25 + adiw r30, 1 /* Z now points to the beginning of the allocated memory */ + movw r2, r30 + movw r4, r22 + movw r26, r24 + ldi r20, 16 +1: + ld r0, X+ + st Z+, r0 + dec r20 + brne 1b + /* state now copied to stack memory */ + clr I_REG + /* Round 1 */ + clr M_REG + ldi r17, 4 +1: + clr N_REG + ldi r16, 4 +2: + movw r24, APTR_REG + movw r22, BPTR_REG + mov r0, M_REG + lsl r0 + lsl r0 + add r0, N_REG + lsl r0 + lsl r0 + add r22, r0 + adc r23, r1 + mov r21, r16 + ldi r30, lo8(shift_table_1) + ldi r31, hi8(shift_table_1) + add r30, N_REG + adc r31, r1 + lpm r20, Z + mov r19, I_REG + ldi r18, 0 + rcall md5_core_asm + inc I_REG + inc N_REG + dec r16 + brne 2b + inc M_REG + dec r17 + brne 1b + + /* Round 2 */ + clr M_REG + ldi r17, 4 +1: + clr N_REG + ldi r16, 4 +2: + movw r24, APTR_REG + movw r22, BPTR_REG + ldi r30, lo8(index_table_r2) + ldi r31, hi8(index_table_r2) + mov r0, M_REG + lsl r0 + lsl r0 + add r0, N_REG + add r30, r0 + adc r31, r1 + lpm r0, Z + add r22, r0 + adc r23, r1 + mov r21, r16 + ldi r30, lo8(shift_table_2) + ldi r31, hi8(shift_table_2) + add r30, N_REG + adc r31, r1 + lpm r20, Z + mov r19, I_REG + ldi r18, 1 + rcall md5_core_asm + inc I_REG + inc N_REG + dec r16 + brne 2b + inc M_REG + dec r17 + brne 1b + + /* Round 3 */ + clr M_REG + ldi r17, 4 +1: + clr N_REG + ldi r16, 4 +2: + movw r24, APTR_REG + movw r22, BPTR_REG + ldi r30, lo8(index_table_r3) + ldi r31, hi8(index_table_r3) + mov r0, M_REG + lsl r0 + lsl r0 + add r0, N_REG + add r30, r0 + adc r31, r1 + lpm r0, Z + add r22, r0 + adc r23, r1 + mov r21, r16 + ldi r30, lo8(shift_table_3) + ldi r31, hi8(shift_table_3) + add r30, N_REG + adc r31, r1 + lpm r20, Z + mov r19, I_REG + ldi r18, 2 + rcall md5_core_asm + inc I_REG + inc N_REG + dec r16 + brne 2b + inc M_REG + dec r17 + brne 1b + + /* Round 4 */ + clr M_REG + ldi r17, 4 +1: + clr N_REG + ldi r16, 4 +2: + movw r24, APTR_REG + movw r22, BPTR_REG + ldi r30, lo8(index_table_r4) + ldi r31, hi8(index_table_r4) + mov r0, M_REG + lsl r0 + lsl r0 + add r0, N_REG + add r30, r0 + adc r31, r1 + lpm r0, Z + add r22, r0 + adc r23, r1 + mov r21, r16 + ldi r30, lo8(shift_table_4) + ldi r31, hi8(shift_table_4) + add r30, N_REG + adc r31, r1 + lpm r20, Z + mov r19, I_REG + ldi r18, 3 + rcall md5_core_asm + inc I_REG + inc N_REG + dec r16 + brne 2b + inc M_REG + dec r17 + brne 1b + + + pop r27 + pop r26 /* X now points to the context */ + movw r30, APTR_REG + ldi r16, 4 +1: + ld r0, X + ld r2, Z+ + add r0, r2 + st X+, r0 + ld r0, X + ld r2, Z+ + adc r0, r2 + st X+, r0 + ld r0, X + ld r2, Z+ + adc r0, r2 + st X+, r0 + ld r0, X + ld r2, Z+ + adc r0, r2 + st X+, r0 + dec r16 + brne 1b + ld r0, X + inc r0 + st X+, r0 + brne 2f + ld r0, X + inc r0 + st X+, r0 + brne 2f + ld r0, X + inc r0 + st X+, r0 + brne 2f + ld r0, X + inc r0 + st X+, r0 +2: + pop r29 + pop r28 + pop_range 2, 17 + stack_free 16 + ret - stack_free 4*4 +;############################################################################### +/* +void md5_lastBlock(md5_ctx_t *state, const void* block, uint16_t length_b){ + uint16_t l; + uint8_t b[64]; + while (length_b >= 512){ + md5_nextBlock(state, block); + length_b -= 512; + block = ((uint8_t*)block) + 512/8; + } + memset(b, 0, 64); + memcpy(b, block, length_b/8); + / * insert padding one * / + l=length_b/8; + if(length_b%8){ + uint8_t t; + t = ((uint8_t*)block)[l]; + t |= (0x80>>(length_b%8)); + b[l]=t; + }else{ + b[l]=0x80; + } + / * insert length value * / + if(l+sizeof(uint64_t) >= 512/8){ + md5_nextBlock(state, b); + state->counter--; + memset(b, 0, 64-8); + } + *((uint64_t*)&b[64-sizeof(uint64_t)]) = (state->counter * 512) + length_b; + md5_nextBlock(state, b); +} +*/ +; state_ptr : r24,r25 +; block_ptr : r22,r23 +; length_b : r20,r21 +.global md5_lastBlock +md5_lastBlock: + stack_alloc_large 64 + push_range 12, 17 + push r30 + push r31 + movw r16, r20 /* length_b */ + movw r14, r22 /* block_ptr */ + movw r12, r24 /* state_ptr */ + ldi r18, 64 +2: + cpi r17, 2 /* hi8(512) */ + brlo 2f +1: + movw r24, r12 + movw r22, r14 + rcall md5_nextBlock + add r14, r18 + adc r15, r1 + subi r17, 2 + rjmp 2b +2: + pop r31 + pop r30 + + adiw r30, 1 /* adjust Z to point to buffer */ + movw r26, r14 + movw r24, r16 + adiw r24, 7 + + lsr r25 + ror r24 + lsr r25 + ror r24 + lsr r24 /* r24 now holds how many bytes are to copy */ + ldi r18, 64 + sub r18, r24 /* r18 will hold the amount of used bytes in buffer */ + tst r24 +4: + breq 5f + ld r0, X+ + st Z+, r0 + dec r24 + rjmp 4b /* Z points to the byte after msg in buffer */ +5: /* append 1-bit */ + mov r20, r16 + ldi r19, 0x80 + andi r20, 0x07 + brne bit_fucking + st Z+, r19 + dec r18 /* 'allocate' another byte in buffer */ + rjmp after_bit_fucking +bit_fucking: +1: + lsr r19 + dec r20 + brne 1b + or r0, r19 + st -Z, r0 + adiw r30, 1 +after_bit_fucking: + clt + cpi r18, 8 + brmi 2f + set /* store in t if the counter will also fit in this block (1 if fit)*/ +2: + tst r18 + breq 2f +1: /* fill remaning buffer with zeros */ + st Z+, r1 + dec r18 + brne 1b +2: + sbiw r30, 63 + sbiw r30, 1 + movw r14, r30 /* r14:r15 now points to buffer */ + brts load_counter + /* counter does not fit, finalize this block */ + movw r24, r12 + movw r22, r14 + rcall md5_nextBlock + movw r30, r14 + ldi r20, 64-8 +3: + st Z+, r1 + dec r20 + brne 3b + +load_counter: + movw r26, r12 /* X points to state */ + adiw r26, 16 + ld r19, X+ + ld r20, X+ + ld r21, X+ + ld r22, X+ + brts post_counter_decrement /* do not decremen because counter fits */ +counter_decrement: + subi r19, 1 + sbci r20, 0 + sbci r21, 0 + sbci r22, 0 +post_counter_decrement: + clr r18 + clr r23 + lsl r19 + rol r20 + rol r21 + rol r22 + rol r23 + mov r18, r16 /* r16:r17 length_b */ + add r19, r17 + adc r20, r1 + adc r21, r1 + adc r22, r1 + adc r23, r1 + movw r30, r14 + adiw r30, 64-8 + st Z+, r18 + st Z+, r19 + st Z+, r20 + st Z+, r21 + st Z+, r22 + st Z+, r23 + st Z+, r1 + st Z, r1 + + sbiw r30, 63 +; sbiw r30, 1 + movw r24, r12 + movw r22, r30 + rcall md5_nextBlock +md5_lastBlock_exit: + pop_range 12, 17 + stack_free_large 64 + ret -*/ +;############################################################################### +.global md5_ctx2hash +md5_ctx2hash: + movw r26, r24 + movw r30, r22 + ldi r22, 16 +1: + ld r0, Z+ + st X+, r0 + dec r22 + brne 1b + ret +;############################################################################### + + +.global md5 +md5: + stack_alloc 20 + push_range 8, 17 + adiw r30, 1 + movw r8, r30 /* ctx */ + movw r10, r24 /* dest */ + movw r12, r22 /* msg */ + movw r14, r18 /* length (low) */ + movw r16, r20 /* length (high) */ + movw r24, r30 + rcall md5_init +1: + tst r16 + brne next_round + tst r17 + breq last_round +next_round: + movw r24, r8 + movw r22, r12 + rcall md5_nextBlock + ldi r22, 64 + add r12, r22 + adc r13, r1 + ldi r22, 2 + sub r15, r22 + sbci r16, 0 + sbci r17, 0 + rjmp 1b +last_round: + movw r24, r8 + movw r22, r12 + movw r20, r14 + rcall md5_lastBlock + movw r24, r10 + movw r22, r8 + rcall md5_ctx2hash + pop_range 8, 17 + stack_free 20 + ret