1 /* bmw_small-tinyasm.S */
3 This file is part of the AVR-Crypto-Lib.
4 Copyright (C) 2009 Daniel Otte (daniel.otte@rub.de)
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>.
21 * File: bmw_small-tinyasm.S
24 * License: GPLv3 or later
25 * Description: implementation of BlueMidnightWish
29 #include "avr-asm-macros.S"
38 /******************************************************************************/
40 param a: r22:r23:r24:r25
59 /******************************************************************************/
61 param a: r22:r23:r24:r25
84 /******************************************************************************/
86 param a: r22:r23:r24:r25
116 /******************************************************************************/
144 param x: r22:r23:r24:25
153 ldi r30, lo8(s_table)
154 ldi r31, hi8(s_table)
182 /******************************************************************************/
184 param dest: r26:r27 (X)
185 param src: r30:r31 (Z)
202 /******************************************************************************/
211 /******************************************************************************/
256 /******************************************************************************/
264 ; .byte 0x2B, 0x64, 0x66, 0x03, 0x51, 0x55, 0x87, 0x55
265 ; .byte 0x55, 0x87, 0x55, 0x51, 0x03, 0x66, 0x64, 0x2B
266 .byte 5, -5, -7, 8, -5, 5, -1, 5, -3, 0, 6, -6, -4, 6, -11, 2
268 ; .byte (2<<1), (7<<1), (4<<1), (3<<1), (4<<1)+1, (6<<1)+1, (6<<1)
269 .byte 8, -6, 6, 4, -3, -4, -7, -2
271 .byte 3,7,13,16,19,23,27
274 .byte 0x03, 0x11, 5*4
275 .byte 0xDD, 0xB3, 7*4
276 .byte 0x2A, 0x79, 10*4
277 .byte 0x07, 0xAA, 13*4
278 .byte 0x51, 0xC2, 14*4
281 /*******************************************************************************
282 * uint32_t addelment(uint8_t j, const uint32_t* m, const uint32_t* h){
284 * r = pgm_read_dword(k_lut+j);
285 * r += rotl_addel(((uint32_t*)m)[j&0xf], j+0);
286 * r += rotl_addel(((uint32_t*)m)[(j+3)&0xf], j+3);
287 * r -= rotl_addel(((uint32_t*)m)[(j+10)&0xf], j+10);
288 * r ^= ((uint32_t*)h)[(j+7)&0xf];
351 /******************************************************************************/
377 rcall load_acc_from_X
384 rcall store_acc_to_dec_X
387 rcall load_rotate_add_M
389 rcall load_rotate_add_M
392 rcall load_rotate_add_M
423 /******************************************************************************/
435 ldi r30, lo8(expand2_rot_table)
436 ldi r31, hi8(expand2_rot_table)
443 12: rcall add32_to_acc
459 /******************************************************************************/
465 /* for calling expand1/2
472 /******************************************************************************/
479 /******************************************************************************/
515 bmw_small_nextBlock_early:
518 .global bmw_small_nextBlock
519 .global bmw256_nextBlock
526 stack_alloc_large 32*4, r28, r29
535 ; push_range 28, 29 /* push Q */
536 ; push_range 22, 25 /* push M & H */
537 /* increment counter */
542 rcall load_acc_from_X
548 rcall store_acc_to_dec_X
568 /* calculate W and store it in Q */
572 /* load initial index */
574 /* load values from hacktable */
575 ldi r30, lo8(f0_hacktable-3)
576 ldi r31, hi8(f0_hacktable-3)
600 rcall load_acc_from_X
711 rcall load_acc_from_X
713 10: rcall load32_from_X
734 /* copy m(Y) into h */
742 ;--- /* calc first half of h0..h15 */
745 ldi r30, lo8(f2_1_shift_table)
746 ldi r31, hi8(f2_1_shift_table)
765 rcall store_acc_to_dec_X
771 sbiw r28, 4*8 /* Y points to q[24] */
774 sbiw r28, 33 /* Y points to q[0] */
777 /* xor q[24..31] into q[0..7] */
779 /* xor q[23] into q[8] */
783 /* xor q[16..22] into q[9..15] */
790 ldi r30, lo8(f2_2_shift_table-8)
791 ldi r31, hi8(f2_2_shift_table-8)
804 sbiw r26, 8*4 /* X points to h8 */
806 sbiw r28, 4*4 /* Y points to h4 */
829 stack_free_large3 32*4+4
841 /******************************************************************************/
860 /******************************************************************************/
867 .global bmw_small_lastBlock
868 .global bmw256_lastBlock
872 /* while(length_b >= BMW_SMALL_BLOCKSIZE){
873 bmw_small_nextBlock(ctx, block);
874 length_b -= BMW_SMALL_BLOCKSIZE;
875 block = (uint8_t*)block + BMW_SMALL_BLOCKSIZE_B;
886 rcall bmw_small_nextBlock_early
901 /* memset(pctx.buffer, 0, 64);
902 memcpy(pctx.buffer, block, (length_b+7)/8);
903 pctx.buffer[length_b>>3] |= 0x80 >> (length_b&0x07);
913 /* copy (#r24) bytes to stack buffer */
918 301: /* calculate the appended byte */
938 /* if(length_b+1>64*8-64){ ; = 64*7-1 = 447 max(length_b)=511
939 bmw_small_nextBlock(ctx, pctx.buffer);
940 memset(pctx.buffer, 0, 64-8);
949 rcall bmw_small_nextBlock_early
956 rcall load32_from_Z_stub
962 /* *((uint64_t*)&(pctx.buffer[64-8])) = (uint64_t)(ctx->counter*512LL)+(uint64_t)length_b;
963 bmw_small_nextBlock(ctx, pctx.buffer);
966 rcall load32_from_Z_stub
986 rcall bmw_small_nextBlock_early
987 /* memset(pctx.buffer, 0xaa, 64);
989 pctx.buffer[i*4] = i+0xa0;
1002 /* bmw_small_nextBlock((bmw_small_ctx_t*)&pctx, ctx->h);
1003 memcpy(ctx->h, pctx.buffer, 64);
1007 rcall bmw_small_nextBlock
1021 /*******************************************************************************
1022 * void bmw256_ctx2hash(void* dest, const bmw256_ctx_t* ctx){
1023 * memcpy(dest, &(ctx->h[8]), 256/8);
1026 * param dest: r24:r25
1027 * param ctx: r22:r23
1029 .global bmw256_ctx2hash
1041 /*******************************************************************************
1042 * void bmw256(void* dest, const void* msg, uint32_t length_b){
1043 * bmw_small_ctx_t ctx;
1044 * bmw256_init(&ctx);
1045 * while(length_b>=BMW_SMALL_BLOCKSIZE){
1046 * bmw_small_nextBlock(&ctx, msg);
1047 * length_b -= BMW_SMALL_BLOCKSIZE;
1048 * msg = (uint8_t*)msg + BMW_SMALL_BLOCKSIZE_B;
1050 * bmw_small_lastBlock(&ctx, msg, length_b);
1051 * bmw256_ctx2hash(dest, &ctx);
1054 * param dest: r24:r25
1055 * param msg: r22:r23
1056 * param length_b: r18:r21
1070 /*******************************************************************************
1071 * void bmw224(void* dest, const void* msg, uint32_t length_b){
1072 * bmw_small_ctx_t ctx;
1073 * bmw224_init(&ctx);
1074 * while(length_b>=BMW_SMALL_BLOCKSIZE){
1075 * bmw_small_nextBlock(&ctx, msg);
1076 * length_b -= BMW_SMALL_BLOCKSIZE;
1077 * msg = (uint8_t*)msg + BMW_SMALL_BLOCKSIZE_B;
1079 * bmw_small_lastBlock(&ctx, msg, length_b);
1080 * bmw224_ctx2hash(dest, &ctx);
1083 * param dest: r24:r25
1084 * param msg: r22:r23
1085 * param length_b: r18:r21
1104 stack_alloc_large 64+4
1117 rcall bmw_small_nextBlock_early
1129 rcall bmw_small_lastBlock
1132 rcall bmw256_ctx2hash
1133 stack_free_large 64+4
1136 /******************************************************************************/
1159 /******************************************************************************/
1168 ldi r24, lo8(qdbg_str)
1169 ldi r25, hi8(qdbg_str)
1172 10: ldi r24, lo8(qdbg_str1)
1173 ldi r25, hi8(qdbg_str1)
1176 call cli_hexdump_byte
1177 ldi r24, lo8(qdbg_str2)
1178 ldi r25, hi8(qdbg_str2)
1183 call cli_hexdump_rev
1191 qdbg_str: .asciz "\r\nDBG Q: "
1192 qdbg_str1: .asciz "\r\n Q["
1193 qdbg_str2: .asciz "] = "
1204 ldi r24, lo8(Xdbg_str)
1205 ldi r25, hi8(Xdbg_str)
1212 10: ldi r24, lo8(Xdbg_str1)
1213 ldi r25, hi8(Xdbg_str1)
1220 call cli_hexdump_byte
1221 ldi r24, lo8(Xdbg_str2)
1222 ldi r25, hi8(Xdbg_str2)
1227 call cli_hexdump_rev
1237 Xdbg_str: .asciz "\r\nDBG "
1238 Xdbg_str1: .asciz "\r\n "
1239 Xdbg_str2: .asciz "] = "
1247 ldi r24, lo8(Xdbg_str)
1248 ldi r25, hi8(Xdbg_str)
1251 call cli_hexdump_byte
1253 call cli_hexdump_byte
1255 call cli_hexdump_byte
1257 call cli_hexdump_byte
1267 ldi r24, lo8(Xdbg_str)
1268 ldi r25, hi8(Xdbg_str)
1271 call cli_hexdump_byte
1273 call cli_hexdump_byte
1275 call cli_hexdump_byte
1277 call cli_hexdump_byte