1 /* bmw_small-tinyasm.S */
3 This file is part of the AVR-Crypto-Lib.
4 Copyright (C) 2009 Daniel Otte (daniel.otte@rub.de)
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>.
21 * File: bmw_small-tinyasm.S
24 * License: GPLv3 or later
25 * Description: implementation of BlueMidnightWish
29 #include "avr-asm-macros.S"
38 /******************************************************************************/
40 param a: r22:r23:r24:r25
54 /******************************************************************************/
56 param a: r22:r23:r24:r25
79 /******************************************************************************/
81 param a: r22:r23:r24:r25
111 /******************************************************************************/
139 param x: r22:r23:r24:25
148 ldi r30, lo8(s_table)
149 ldi r31, hi8(s_table)
177 /******************************************************************************/
179 param dest: r26:r27 (X)
180 param src: r30:r31 (Z)
197 /******************************************************************************/
206 /******************************************************************************/
251 /******************************************************************************/
259 ; .byte 0x2B, 0x64, 0x66, 0x03, 0x51, 0x55, 0x87, 0x55
260 .byte 0x55, 0x87, 0x55, 0x51, 0x03, 0x66, 0x64, 0x2B
262 ; .byte (2<<1), (7<<1), (4<<1), (3<<1), (4<<1)+1, (6<<1)+1, (6<<1)
263 .byte (8<<1)+1, (6<<1), (6<<1)+1, (4<<1)+1, (3<<1), (4<<1), (7<<1), (2<<1)
265 .byte 3,7,13,16,19,23,27
268 .byte 0x03, 0x11, 5*4
269 .byte 0xDD, 0xB3, 7*4
270 .byte 0x2A, 0x79, 10*4
271 .byte 0x07, 0xAA, 13*4
272 .byte 0x51, 0xC2, 14*4
275 /*******************************************************************************
276 * uint32_t addelment(uint8_t j, const uint32_t* m, const uint32_t* h){
278 * r = pgm_read_dword(k_lut+j);
279 * r += rotl_addel(((uint32_t*)m)[j&0xf], j+0);
280 * r += rotl_addel(((uint32_t*)m)[(j+3)&0xf], j+3);
281 * r -= rotl_addel(((uint32_t*)m)[(j+10)&0xf], j+10);
282 * r ^= ((uint32_t*)h)[(j+7)&0xf];
345 /******************************************************************************/
371 rcall load_acc_from_X
378 rcall store_acc_to_dec_X
381 rcall load_rotate_add_M
383 rcall load_rotate_add_M
386 rcall load_rotate_add_M
417 /******************************************************************************/
429 ldi r30, lo8(expand2_rot_table)
430 ldi r31, hi8(expand2_rot_table)
437 12: rcall add32_to_acc
453 /******************************************************************************/
459 /* for calling expand1/2
466 /******************************************************************************/
473 /******************************************************************************/
509 bmw_small_nextBlock_early:
512 .global bmw_small_nextBlock
513 .global bmw224_nextBlock
514 .global bmw256_nextBlock
521 stack_alloc_large 32*4, r28, r29
530 ; push_range 28, 29 /* push Q */
531 ; push_range 22, 25 /* push M & H */
532 /* increment counter */
537 rcall load_acc_from_X
543 rcall store_acc_to_dec_X
563 /* calculate W and store it in Q */
567 /* load initial index */
569 /* load values from hacktable */
570 ldi r30, lo8(f0_hacktable-3)
571 ldi r31, hi8(f0_hacktable-3)
595 rcall load_acc_from_X
706 rcall load_acc_from_X
708 10: rcall load32_from_X
729 /* copy m(Y) into h */
737 ;--- /* calc first half of h0..h15 */
740 ldi r30, lo8(f2_1_shift_table)
741 ldi r31, hi8(f2_1_shift_table)
763 25: rcall shiftleft32
764 26: rcall mov32_to_acc
774 27: rcall shiftright32
775 28: rcall eor32_to_acc
793 sbiw r28, 4*8 /* Y points to q[24] */
796 sbiw r28, 33 /* Y points to q[0] */
799 /* xor q[24..31] into q[0..7] */
801 /* xor q[23] into q[8] */
805 /* xor q[16..22] into q[9..15] */
812 ldi r30, lo8(f2_2_shift_table)
813 ldi r31, hi8(f2_2_shift_table)
833 sbiw r26, 8*4 /* X points to h8 */
835 sbiw r28, 4*4 /* Y points to h4 */
858 stack_free_large3 32*4+4
870 /******************************************************************************/
889 /******************************************************************************/
896 .global bmw_small_lastBlock
897 .global bmw224_lastBlock
898 .global bmw256_lastBlock
902 /* while(length_b >= BMW_SMALL_BLOCKSIZE){
903 bmw_small_nextBlock(ctx, block);
904 length_b -= BMW_SMALL_BLOCKSIZE;
905 block = (uint8_t*)block + BMW_SMALL_BLOCKSIZE_B;
916 rcall bmw_small_nextBlock_early
931 /* memset(pctx.buffer, 0, 64);
932 memcpy(pctx.buffer, block, (length_b+7)/8);
933 pctx.buffer[length_b>>3] |= 0x80 >> (length_b&0x07);
943 /* copy (#r24) bytes to stack buffer */
948 301: /* calculate the appended byte */
968 /* if(length_b+1>64*8-64){ ; = 64*7-1 = 447 max(length_b)=511
969 bmw_small_nextBlock(ctx, pctx.buffer);
970 memset(pctx.buffer, 0, 64-8);
979 rcall bmw_small_nextBlock_early
986 rcall load32_from_Z_stub
992 /* *((uint64_t*)&(pctx.buffer[64-8])) = (uint64_t)(ctx->counter*512LL)+(uint64_t)length_b;
993 bmw_small_nextBlock(ctx, pctx.buffer);
996 rcall load32_from_Z_stub
1016 rcall bmw_small_nextBlock_early
1017 /* memset(pctx.buffer, 0xaa, 64);
1019 pctx.buffer[i*4] = i+0xa0;
1032 /* bmw_small_nextBlock((bmw_small_ctx_t*)&pctx, ctx->h);
1033 memcpy(ctx->h, pctx.buffer, 64);
1037 rcall bmw_small_nextBlock
1051 /*******************************************************************************
1052 * void bmw224_ctx2hash(void* dest, const bmw224_ctx_t* ctx){
1053 * memcpy(dest, &(ctx->h[9]), 224/8);
1056 * param dest: r24:r25
1057 * param ctx: r22:r23
1059 .global bmw224_ctx2hash
1066 /*******************************************************************************
1067 * void bmw256_ctx2hash(void* dest, const bmw256_ctx_t* ctx){
1068 * memcpy(dest, &(ctx->h[8]), 256/8);
1071 * param dest: r24:r25
1072 * param ctx: r22:r23
1074 .global bmw256_ctx2hash
1086 /*******************************************************************************
1087 * void bmw256(void* dest, const void* msg, uint32_t length_b){
1088 * bmw_small_ctx_t ctx;
1089 * bmw256_init(&ctx);
1090 * while(length_b>=BMW_SMALL_BLOCKSIZE){
1091 * bmw_small_nextBlock(&ctx, msg);
1092 * length_b -= BMW_SMALL_BLOCKSIZE;
1093 * msg = (uint8_t*)msg + BMW_SMALL_BLOCKSIZE_B;
1095 * bmw_small_lastBlock(&ctx, msg, length_b);
1096 * bmw256_ctx2hash(dest, &ctx);
1099 * param dest: r24:r25
1100 * param msg: r22:r23
1101 * param length_b: r18:r21
1119 /*******************************************************************************
1120 * void bmw224(void* dest, const void* msg, uint32_t length_b){
1121 * bmw_small_ctx_t ctx;
1122 * bmw224_init(&ctx);
1123 * while(length_b>=BMW_SMALL_BLOCKSIZE){
1124 * bmw_small_nextBlock(&ctx, msg);
1125 * length_b -= BMW_SMALL_BLOCKSIZE;
1126 * msg = (uint8_t*)msg + BMW_SMALL_BLOCKSIZE_B;
1128 * bmw_small_lastBlock(&ctx, msg, length_b);
1129 * bmw224_ctx2hash(dest, &ctx);
1132 * param dest: r24:r25
1133 * param msg: r22:r23
1134 * param length_b: r18:r21
1157 stack_alloc_large 64+4
1168 ldi r30, pm_lo8(init_lut)
1169 ldi r31, pm_hi8(init_lut)
1177 rcall bmw_small_nextBlock_early
1189 rcall bmw_small_lastBlock
1192 ldi r30, pm_lo8(c2h_lut)
1193 ldi r31, pm_hi8(c2h_lut)
1197 stack_free_large 64+4
1205 rjmp bmw224_ctx2hash
1206 rjmp bmw256_ctx2hash
1208 /*******************************************************************************
1209 * void bmw224_init(bmw224_ctx_t* ctx){
1211 * ctx->h[0] = 0x00010203;
1212 * for(i=1; i<16; ++i){
1213 * ctx->h[i] = ctx->h[i-1]+ 0x04040404;
1218 * param ctx: r24:r25
1249 /******************************************************************************/
1258 ldi r24, lo8(qdbg_str)
1259 ldi r25, hi8(qdbg_str)
1262 10: ldi r24, lo8(qdbg_str1)
1263 ldi r25, hi8(qdbg_str1)
1266 call cli_hexdump_byte
1267 ldi r24, lo8(qdbg_str2)
1268 ldi r25, hi8(qdbg_str2)
1273 call cli_hexdump_rev
1281 qdbg_str: .asciz "\r\nDBG Q: "
1282 qdbg_str1: .asciz "\r\n Q["
1283 qdbg_str2: .asciz "] = "
1294 ldi r24, lo8(Xdbg_str)
1295 ldi r25, hi8(Xdbg_str)
1302 10: ldi r24, lo8(Xdbg_str1)
1303 ldi r25, hi8(Xdbg_str1)
1310 call cli_hexdump_byte
1311 ldi r24, lo8(Xdbg_str2)
1312 ldi r25, hi8(Xdbg_str2)
1317 call cli_hexdump_rev
1327 Xdbg_str: .asciz "\r\nDBG "
1328 Xdbg_str1: .asciz "\r\n "
1329 Xdbg_str2: .asciz "] = "
1337 ldi r24, lo8(Xdbg_str)
1338 ldi r25, hi8(Xdbg_str)
1341 call cli_hexdump_byte
1343 call cli_hexdump_byte
1345 call cli_hexdump_byte
1347 call cli_hexdump_byte
1357 ldi r24, lo8(Xdbg_str)
1358 ldi r25, hi8(Xdbg_str)
1361 call cli_hexdump_byte
1363 call cli_hexdump_byte
1365 call cli_hexdump_byte
1367 call cli_hexdump_byte