1 /* bmw_small-tinyasm.S */
3 This file is part of the AVR-Crypto-Lib.
4 Copyright (C) 2009 Daniel Otte (daniel.otte@rub.de)
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>.
21 * File: bmw_small-tinyasm.S
24 * License: GPLv3 or later
25 * Description: implementation of BlueMidnightWish
29 #include "avr-asm-macros.S"
36 /******************************************************************************/
38 param a: r22:r23:r24:r25
52 /******************************************************************************/
54 param a: r22:r23:r24:r25
77 /******************************************************************************/
79 param a: r22:r23:r24:r25
109 /******************************************************************************/
137 param x: r22:r23:r24:25
146 ldi r30, lo8(s_table)
147 ldi r31, hi8(s_table)
175 /******************************************************************************/
177 param dest: r26:r27 (X)
178 param src: r30:r31 (Z)
194 /******************************************************************************/
203 /******************************************************************************/
248 eor_acc_from_Y_add_to_Z:
253 /******************************************************************************/
261 .byte 0x03, 0x11, 5*4
262 .byte 0xDD, 0xB3, 7*4
263 .byte 0x2A, 0x79, 10*4
264 .byte 0x07, 0xAA, 13*4
265 .byte 0x51, 0xC2, 14*4
266 .byte 0 ; just for alignment
269 /*******************************************************************************
270 * uint32_t addelment(uint8_t j, const uint32_t* m, const uint32_t* h){
272 * r = pgm_read_dword(k_lut+j);
273 * r += rotl_addel(((uint32_t*)m)[j&0xf], j+0);
274 * r += rotl_addel(((uint32_t*)m)[(j+3)&0xf], j+3);
275 * r -= rotl_addel(((uint32_t*)m)[(j+10)&0xf], j+10);
276 * r ^= ((uint32_t*)h)[(j+7)&0xf];
339 /******************************************************************************/
365 rcall load_acc_from_X
372 rcall store_acc_to_dec_X
375 rcall load_rotate_add_M
377 rcall load_rotate_add_M
380 rcall load_rotate_add_M
411 /******************************************************************************/
420 .byte 0x2B, 0x64, 0x66, 0x03, 0x51, 0x55, 0x87, 0x55
422 .byte (2<<1), (7<<1), (4<<1), (3<<1), (4<<1)+1, (6<<1)+1, (6<<1)
425 .byte 3,7,13,16,19,23,27
426 ; .byte 0 ; just for alignment
431 ldi r30, lo8(expand2_rot_table)
432 ldi r31, hi8(expand2_rot_table)
439 12: rcall add32_to_acc
455 /******************************************************************************/
461 /* for calling expand1/2
468 /******************************************************************************/
475 /******************************************************************************/
511 bmw_small_nextBlock_early:
514 .global bmw_small_nextBlock
515 .global bmw224_nextBlock
516 .global bmw256_nextBlock
523 stack_alloc_large 32*4, r28, r29
532 ; push_range 28, 29 /* push Q */
533 ; push_range 22, 25 /* push M & H */
534 /* increment counter */
539 rcall load_acc_from_X
545 rcall store_acc_to_dec_X
565 /* calculate W and store it in Q */
569 /* load initial index */
571 /* load values from hacktable */
572 ldi r30, lo8(f0_hacktable-3)
573 ldi r31, hi8(f0_hacktable-3)
597 rcall load_acc_from_X
707 10: rcall load32_from_X
729 ;--- /* calc first half of h0..h15 */
742 15: ldi r30, lo8(f2_1_shift_table-9)
743 ldi r31, hi8(f2_1_shift_table-9)
758 25: rcall shiftleft32
759 26: rcall eor32_to_acc
769 27: rcall shiftright32
770 28: rcall eor32_to_acc
782 sbiw r26, 4*8 /* X points to q[24] */
785 sbiw r28, 33 /* Y points to q[0] */
787 sbiw r30, 1 /* Z points to h0 */
793 rcall eor_acc_from_Y_add_to_Z
796 sbiw r26, 9*4 /* X points to q[23] */
797 rcall load_acc_from_X
801 rcall eor_acc_from_Y_add_to_Z
803 sbiw r26, 8*4 /* X points to q[16] */
807 ldi r30, lo8(f2_2_shift_table-1)
808 ldi r31, hi8(f2_2_shift_table-1)
812 rcall load_acc_from_X
819 20: rcall shiftright32
823 rcall eor_acc_from_Y_add_to_Z
828 sbiw r30, 8*4 /* Z points to h8 */
830 sbiw r26, 4*4 /* X points to h4 */
852 stack_free_large3 32*4+4
864 /******************************************************************************/
883 /******************************************************************************/
890 .global bmw_small_lastBlock
891 .global bmw224_lastBlock
892 .global bmw256_lastBlock
896 /* while(length_b >= BMW_SMALL_BLOCKSIZE){
897 bmw_small_nextBlock(ctx, block);
898 length_b -= BMW_SMALL_BLOCKSIZE;
899 block = (uint8_t*)block + BMW_SMALL_BLOCKSIZE_B;
910 rcall bmw_small_nextBlock_early
925 /* memset(pctx.buffer, 0, 64);
926 memcpy(pctx.buffer, block, (length_b+7)/8);
927 pctx.buffer[length_b>>3] |= 0x80 >> (length_b&0x07);
937 /* copy (#r24) bytes to stack buffer */
942 301: /* calculate the appended byte */
962 /* if(length_b+1>64*8-64){ ; = 64*7-1 = 447 max(length_b)=511
963 bmw_small_nextBlock(ctx, pctx.buffer);
964 memset(pctx.buffer, 0, 64-8);
973 rcall bmw_small_nextBlock_early
980 rcall load32_from_Z_stub
986 /* *((uint64_t*)&(pctx.buffer[64-8])) = (uint64_t)(ctx->counter*512LL)+(uint64_t)length_b;
987 bmw_small_nextBlock(ctx, pctx.buffer);
990 rcall load32_from_Z_stub
1010 rcall bmw_small_nextBlock_early
1011 /* memset(pctx.buffer, 0xaa, 64);
1013 pctx.buffer[i*4] = i+0xa0;
1026 /* bmw_small_nextBlock((bmw_small_ctx_t*)&pctx, ctx->h);
1027 memcpy(ctx->h, pctx.buffer, 64);
1031 rcall bmw_small_nextBlock
1045 /*******************************************************************************
1046 * void bmw224_ctx2hash(void* dest, const bmw224_ctx_t* ctx){
1047 * memcpy(dest, &(ctx->h[9]), 224/8);
1050 * param dest: r24:r25
1051 * param ctx: r22:r23
1053 .global bmw224_ctx2hash
1060 /*******************************************************************************
1061 * void bmw256_ctx2hash(void* dest, const bmw256_ctx_t* ctx){
1062 * memcpy(dest, &(ctx->h[8]), 256/8);
1065 * param dest: r24:r25
1066 * param ctx: r22:r23
1068 .global bmw256_ctx2hash
1080 /*******************************************************************************
1081 * void bmw256(void* dest, const void* msg, uint32_t length_b){
1082 * bmw_small_ctx_t ctx;
1083 * bmw256_init(&ctx);
1084 * while(length_b>=BMW_SMALL_BLOCKSIZE){
1085 * bmw_small_nextBlock(&ctx, msg);
1086 * length_b -= BMW_SMALL_BLOCKSIZE;
1087 * msg = (uint8_t*)msg + BMW_SMALL_BLOCKSIZE_B;
1089 * bmw_small_lastBlock(&ctx, msg, length_b);
1090 * bmw256_ctx2hash(dest, &ctx);
1093 * param dest: r24:r25
1094 * param msg: r22:r23
1095 * param length_b: r18:r21
1113 /*******************************************************************************
1114 * void bmw224(void* dest, const void* msg, uint32_t length_b){
1115 * bmw_small_ctx_t ctx;
1116 * bmw224_init(&ctx);
1117 * while(length_b>=BMW_SMALL_BLOCKSIZE){
1118 * bmw_small_nextBlock(&ctx, msg);
1119 * length_b -= BMW_SMALL_BLOCKSIZE;
1120 * msg = (uint8_t*)msg + BMW_SMALL_BLOCKSIZE_B;
1122 * bmw_small_lastBlock(&ctx, msg, length_b);
1123 * bmw224_ctx2hash(dest, &ctx);
1126 * param dest: r24:r25
1127 * param msg: r22:r23
1128 * param length_b: r18:r21
1151 stack_alloc_large 64+4
1162 ldi r30, pm_lo8(init_lut)
1163 ldi r31, pm_hi8(init_lut)
1171 rcall bmw_small_nextBlock_early
1183 rcall bmw_small_lastBlock
1186 ldi r30, pm_lo8(c2h_lut)
1187 ldi r31, pm_hi8(c2h_lut)
1191 stack_free_large 64+4
1199 rjmp bmw224_ctx2hash
1200 rjmp bmw256_ctx2hash
1202 /*******************************************************************************
1203 * void bmw224_init(bmw224_ctx_t* ctx){
1205 * ctx->h[0] = 0x00010203;
1206 * for(i=1; i<16; ++i){
1207 * ctx->h[i] = ctx->h[i-1]+ 0x04040404;
1212 * param ctx: r24:r25
1243 /******************************************************************************/
1252 ldi r24, lo8(qdbg_str)
1253 ldi r25, hi8(qdbg_str)
1256 10: ldi r24, lo8(qdbg_str1)
1257 ldi r25, hi8(qdbg_str1)
1260 call cli_hexdump_byte
1261 ldi r24, lo8(qdbg_str2)
1262 ldi r25, hi8(qdbg_str2)
1267 call cli_hexdump_rev
1275 qdbg_str: .asciz "\r\nDBG Q: "
1276 qdbg_str1: .asciz "\r\n Q["
1277 qdbg_str2: .asciz "] = "
1288 ldi r24, lo8(Xdbg_str)
1289 ldi r25, hi8(Xdbg_str)
1296 10: ldi r24, lo8(Xdbg_str1)
1297 ldi r25, hi8(Xdbg_str1)
1304 call cli_hexdump_byte
1305 ldi r24, lo8(Xdbg_str2)
1306 ldi r25, hi8(Xdbg_str2)
1311 call cli_hexdump_rev
1321 Xdbg_str: .asciz "\r\nDBG "
1322 Xdbg_str1: .asciz "\r\n "
1323 Xdbg_str2: .asciz "] = "
1331 ldi r24, lo8(Xdbg_str)
1332 ldi r25, hi8(Xdbg_str)
1335 call cli_hexdump_byte
1337 call cli_hexdump_byte
1339 call cli_hexdump_byte
1341 call cli_hexdump_byte
1351 ldi r24, lo8(Xdbg_str)
1352 ldi r25, hi8(Xdbg_str)
1355 call cli_hexdump_byte
1357 call cli_hexdump_byte
1359 call cli_hexdump_byte
1361 call cli_hexdump_byte