/* ubi1024_asm.S */ /* This file is part of the AVR-Crypto-Lib. Copyright (C) 2009 Daniel Otte (daniel.otte@rub.de) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ /* * \author Daniel Otte * \email daniel.otte@rub.de * \date 2009-03-16 * \license GPLv3 or later */ #include "avr-asm-macros.S" /******************************************************************************/ /* void ubi1024_init(ubi1024_ctx_t *ctx, const void *g, uint8_t type){ memset(ctx->tweak, 0, 15); ctx->tweak[15] = 0x40+type; memcpy(ctx->g, g, UBI1024_BLOCKSIZE_B); } */ /* * param ctx: r24:r25 * param g: r22:r23 * param type: r20 */ .global ubi1024_init ubi1024_init: movw r26, r24 ldi r21, 15 1: st X+, r1 dec r21 brne 1b ori r20, 0x40 st X+, r20 ldi r21, 128 movw r30, r22 2: ld r20, Z+ st X+, r20 dec r21 brne 2b ret /******************************************************************************/ /* void ubi1024_ctx2hash(void *dest, const ubi1024_ctx_t *ctx){ memcpy(dest, ctx->g, UBI1024_BLOCKSIZE_B); } */ /* * param dest: r24:r24 * param ctx: r22:r23 */ .global ubi1024_ctx2hash ubi1024_ctx2hash: movw r26, r24 movw r30, r22 adiw r30, 16 ldi r22, 128 1: ld r23, Z+ st X+, r23 dec r22 brne 1b ret /******************************************************************************/ /* void ubi1024_nextBlock(ubi1024_ctx_t *ctx, const void *block){ threefish1024_ctx_t tfctx; ((uint64_t*)(ctx->tweak))[0] += UBI1024_BLOCKSIZE_B; threefish1024_init(ctx->g, ctx->tweak, &tfctx); memcpy(ctx->g, block, UBI1024_BLOCKSIZE_B); threefish1024_enc(ctx->g, &tfctx); memxor(ctx->g, block, UBI1024_BLOCKSIZE_B); ctx->tweak[15] &= (uint8_t)~0x40; } */ /* * param ctx: r24:r25 * param block: r22:r23 */ CTX0 = 2 CTX1 = 3 BLOCK0 = 4 BLOCK1 = 5 TFCTX0 = 6 TFCTX1 = 7 .global ubi1024_nextBlock ubi1024_nextBlock: stack_alloc_large 20*8 push_range 2, 7 adiw r30, 1 /* Z points to tfctx */ movw TFCTX0, r30 movw CTX0, r24 movw BLOCK0, r22 movw r26, r24 /* add BLOCKSIZE_B (128) to tweak */ ldi r25, 128 ld r24, X add r24, r25 st X+, r24 ldi r25, 11 1: ld r24, X adc r24, r1 st X+, r24 dec r25 brne 1b /* call threefish1024_init */ movw r24, CTX0 adiw r24, 16 movw r22, CTX0 movw CTX0, r24 /* CTX points to ctx->g */ movw r20, TFCTX0 rcall threefish1024_init /* copy block to ctx->g */ movw r26, CTX0 movw r30, BLOCK0 ldi r25, 128 1: ld r24, Z+ st X+, r24 dec r25 brne 1b /* call threefish1024_enc */ movw r24, CTX0 movw r22, TFCTX0 rcall threefish1024_enc /* xor block into ctx->g */ movw r26, BLOCK0 movw r30, CTX0 ldi r25, 128 1: ld r24, X+ ld r23, Z eor r23, r24 st Z+, r23 dec r25 brne 1b /* clear 'first' bit in tweak */ sbiw r30, 1+2 sbiw r30, 63 sbiw r30, 63 ld r24, Z andi r24, ~0x40 st Z, r24 exit: pop_range 2, 7 stack_free_large2 20*8 ret /******************************************************************************/ /* void ubi1024_lastBlock(ubi1024_ctx_t *ctx, const void *block, uint16_t length_b){ threefish1024_ctx_t tfctx; while(length_b>UBI1024_BLOCKSIZE){ ubi1024_nextBlock(ctx, block); block = (uint8_t*)block + UBI1024_BLOCKSIZE_B; length_b -= UBI1024_BLOCKSIZE; } ctx->tweak[15] |= 0x80; ((uint64_t*)(ctx->tweak))[0] += (length_b+7)/8; if(length_b & 0x07) ctx->tweak[14] |= 0x80; threefish1024_init(ctx->g, ctx->tweak, &tfctx); memset(ctx->g, 0, UBI1024_BLOCKSIZE_B); memcpy(ctx->g, block, (length_b+7)/8); if(length_b & 0x07) ctx->g[(length_b+7)/8-1] |= 0x80>>(length_b&7); threefish1024_enc(ctx->g, &tfctx); memxor(ctx->g, block, (length_b+7)/8); if(length_b & 0x07){ ctx->g[((length_b+7)/8)-1] ^= 0x80>>(length_b&7); } } */ /* * param ctx: r24:r25 * param block: r22:r23 * param ength_b: r20:r21 */ MASK_B = 8 LEN_B = 9 TFCTX0 = 10 TFCTX1 = 11 CTX0 = 12 CTX1 = 13 BLOCK0 = 14 BLOCK1 = 15 LENGTH0 = 16 LENGTH1 = 17 .global ubi1024_lastBlock ubi1024_lastBlock: /* run nextBlock for preceding blocks*/ push_range 8, 17 movw CTX0, r24 movw BLOCK0, r22 movw LENGTH0, r20 1: cpi LENGTH1, 5 brlo 2f movw r24, CTX0 movw r22, BLOCK0 rcall ubi1024_nextBlock ldi r25, 128 add BLOCK0, r25 adc BLOCK1, r1 subi LENGTH1, 4 rjmp 1b 2: cpi LENGTH1, 4 brlo 3f tst LENGTH0 breq 3f movw r24, CTX0 movw r22, BLOCK0 rcall ubi1024_nextBlock ldi r25, 128 add BLOCK0, r25 adc BLOCK1, r1 subi LENGTH1, 4 3: /* now the real fun */ stack_alloc_large 20*8 adiw r30, 1 movw TFCTX0, r30 /* calculate LEN_B */ movw r24, LENGTH0 adiw r24, 7 lsr r25 ror r24 lsr r25 ror r24 lsr r25 ror r24 mov LEN_B, r24 /* add length to tweak */ movw r30, CTX0 ld r24, Z add r24, LEN_B st Z+, r24 ldi r25, 11 1: ld r24, Z adc r24, r1 st Z+, r24 dec r25 brne 1b /* set 'final' bit*/ movw r30, CTX0 ldd r24, Z+15 ori r24, 0x80 std Z+15, r24 /* store in MASK_B if we do bit processing and set 'BitPad' bit*/ clr MASK_B mov r24, LENGTH0 andi r24, 0x07 tst r24 breq 4f ldd r25, Z+14 ori r25, 0x80 std Z+14, r25 ldi r25, 0x80 mov MASK_B, r25 1: lsr MASK_B dec r24 brne 1b 4: /* call threefish1024_init*/ movw r24, CTX0 adiw r24, 16 movw r22, CTX0 movw CTX0, r24 /* CTX points at ctx->g */ movw r20, TFCTX0 rcall threefish1024_init /* copy block to ctx->g */ movw r26, BLOCK0 movw r30, CTX0 mov r24, LEN_B ldi r25, 128 sub r25, LEN_B tst r24 1: breq 2f ld r22, X+ st Z+, r22 dec r24 rjmp 1b 2: tst MASK_B breq 29f or r22, MASK_B st -Z, r22 adiw r30, 1 29: tst r25 3: breq 4f st Z+, r1 dec r25 rjmp 3b 4: /* call threefish1024_enc */ movw r24, CTX0 movw r22, TFCTX0 rcall threefish1024_enc /* xor block into ctx->g */ movw r30, CTX0 movw r26, BLOCK0 tst LEN_B 5: breq 6f ld r22, X+ ld r23, Z eor r23, r22 st Z+, r23 dec LEN_B rjmp 5b 6: tst MASK_B breq 7f eor r23, MASK_B st -Z, r23 7: stack_free_large2 20*8 pop_range 8, 17 ret