From 0ce5b31e9772c15cd1514e371a59f04e3a888111 Mon Sep 17 00:00:00 2001 From: bg Date: Tue, 5 Jul 2011 19:34:03 +0000 Subject: [PATCH] some bigint stuff in ASM --- avr-asm-macros.S | 3 + bigint/bigint.c | 3 +- bigint/bigint_add_u.S | 3 +- bigint/bigint_adjust.S | 110 ++++++++++++ bigint/bigint_asm.S | 334 ++++++++++++++++++++++++++++++++++++ doc/acl_blockciphers.texi | 3 +- host/bigint_test.rb | 55 +++++- mkfiles/bigint.mk | 2 +- test_src/main-bigint-test.c | 6 +- 9 files changed, 501 insertions(+), 18 deletions(-) create mode 100644 bigint/bigint_adjust.S create mode 100644 bigint/bigint_asm.S diff --git a/avr-asm-macros.S b/avr-asm-macros.S index 63f9303..766cdae 100644 --- a/avr-asm-macros.S +++ b/avr-asm-macros.S @@ -26,6 +26,8 @@ * */ +//#ifndef AVR_ASM_MACROS__S__ +//#define AVR_ASM_MACROS__S__ #include /******************************************************************************* @@ -144,4 +146,5 @@ *******************************************************************************/ +//#endif /* AVR_ASM_MACROS__S__ */ diff --git a/bigint/bigint.c b/bigint/bigint.c index 0ff6338..3e2f0eb 100644 --- a/bigint/bigint.c +++ b/bigint/bigint.c @@ -77,9 +77,9 @@ void bigint_adjust(bigint_t* a){ /******************************************************************************/ void bigint_copy(bigint_t* dest, const bigint_t* src){ - memcpy(dest->wordv, src->wordv, src->length_B); dest->length_B = src->length_B; dest->info = src->info; + memcpy(dest->wordv, src->wordv, src->length_B); } /******************************************************************************/ @@ -611,7 +611,6 @@ void bigint_reduce(bigint_t* a, const bigint_t* r){ while(bigint_cmp_u(a,r)>=0){ bigint_sub_u(a,a,r); } - bigint_adjust(a); } /******************************************************************************/ diff --git a/bigint/bigint_add_u.S b/bigint/bigint_add_u.S index 7c34f1a..5c75899 100644 --- a/bigint/bigint_add_u.S +++ b/bigint/bigint_add_u.S @@ -26,7 +26,6 @@ * */ -#include "avr-asm-macros.S" /* param dest: r24:r25 @@ -132,6 +131,6 @@ bigint_add_u: 9: pop_range 24, 25 pop_range 28, 29 - jmp bigint_adjust + rjmp bigint_adjust diff --git a/bigint/bigint_adjust.S b/bigint/bigint_adjust.S new file mode 100644 index 0000000..56ff2a1 --- /dev/null +++ b/bigint/bigint_adjust.S @@ -0,0 +1,110 @@ +/* bigint_adjust.S */ +/* + This file is part of the ARM-Crypto-Lib. + Copyright (C) 2006-2010 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +/* +void bigint_adjust(bigint_t* a){ + while(a->length_B!=0 && a->wordv[a->length_B-1]==0){ + a->length_B--; + } + if(a->length_B==0){ + a->info=0; + return; + } + uint8_t t; + uint8_t i = 0x07; + t = a->wordv[a->length_B-1]; + while((t&0x80)==0 && i){ + t<<=1; + i--; + } + SET_FBS(a, i); +} +*/ +.global bigint_adjust +bigint_adjust: + movw r30, r24 + ldd r24, Z+0 + ldd r25, Z+1 + ldd r26, Z+3 + ldd r27, Z+4 + add r26, r24 + adc r27, r25 +20: + sbiw r24, 1 + brmi 30f + ld r23, -X + tst r23 + brne 40f + rjmp 20b +30: + std Z+0, r1 + std Z+0, r1 + std Z+2, r1 + ret +40: + adiw r24, 1 + std Z+0, r24 + std Z+1, r25 + clr r24 +50: + sbrc r23, 0 + mov r25, r24 + lsr r23 + inc r24 + brpl 50b + ldd r23, Z+2 + andi r23, 0xF8 + or r23, r25 + std Z+2, r23 +bigint_adjust_ret: + ret + +/* +void bigint_copy(bigint_t* dest, const bigint_t* src){ + memcpy(dest->wordv, src->wordv, src->length_B); + dest->length_B = src->length_B; + dest->info = src->info; +} +*/ +.global bigint_copy +bigint_copy: + movw r26, r24 + movw r30, r22 + /* copy length */ + ld r24, Z+ + st X+, r24 + ld r25, Z+ + st X+, r25 + /* copy info */ + ld r22, Z+ + st X+, r22 + /* load wordv pointers */ + ld r22, Z+ + ld r23, Z + movw r30, r22 + ld r22, X+ + ld r23, X + movw r26, r22 +10: + sbiw r24, 1 + brmi bigint_adjust_ret + ld r22, Z+ + st X+, r22 + rjmp 10b + diff --git a/bigint/bigint_asm.S b/bigint/bigint_asm.S new file mode 100644 index 0000000..a91ddad --- /dev/null +++ b/bigint/bigint_asm.S @@ -0,0 +1,334 @@ +/* bigint_asm.S */ +/* + This file is part of the ARM-Crypto-Lib. + Copyright (C) 2006-2010 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include "avr-asm-macros.S" +#include "bigint_adjust.S" +#include "bigint_add_u.S" + + +/******************************************************************************/ +/* +void bigint_add_scale_u(bigint_t* dest, const bigint_t* a, uint16_t scale){ + uint16_t i,j=0; + uint16_t t=0; + if(scale>dest->length_B) + memset(dest->wordv+dest->length_B, 0, scale-dest->length_B); + for(i=scale; ilength_B+scale; ++i,++j){ + t = a->wordv[j] + t; + if(dest->length_B>i){ + t += dest->wordv[i]; + } + dest->wordv[i] = (uint8_t)t; + t>>=8; + } + while(t){ + if(dest->length_B>i){ + t = dest->wordv[i] + t; + } + dest->wordv[i] = (uint8_t)t; + t>>=8; + ++i; + } + if(dest->length_B < i){ + dest->length_B = i; + } + bigint_adjust(dest); +} +*/ + +DST_SIZE_0 = 22 +DST_SIZE_1 = 23 +SRC_SIZE_0 = 20 +SRC_SIZE_1 = 23 +SCALE_0 = 18 +SCALE_1 = 19 +DST_CTX_0 = 6 +DST_CTX_1 = 7 +SRC_CTX_0 = 8 +SRC_CTX_1 = 9 +TMP_0 = 10 +TMP_1 = 11 + +.global bigint_add_scale_u +#if 0 +bigint_add_scale_u: + push_range 6, 11 + movw r30, r24 /* dest ptr */ + movw r26, r22 /* src ptr */ + movw r24, r20 /* scale */ + movw DST_CTX_0, r30 + movw SRC_CTX_0, r26 + movw SCALE_0, r24 + /* pad dst with zeros if scale > dst_length */ + ld DST_SIZE_0, Z+ + ld DST_SIZE_1, Z+ + sub r24, DST_SIZE_0 + sbc r25, DST_SIZE_1 + ldd TMP_0, Z+1 /* load tmp with DST_WORDV */ + ldd TMP_1, Z+2 + movw r30, TMP_0 + brmi 20f /* branch if DST_SIZE > scale */ + add r30, DST_SIZE_0 + adc r31, DST_SIZE_1 +10: + sbiw r24, 1 + brmi 25f + st Z+, r1 + rjmp 10b +20: + add r30, r20 /* add scale to DST_WORDV */ + adc r31, r21 + /* add src to dest until one of the two ends */ +25: + ld SRC_SIZE_0, X+ + ld SRC_SIZE_1, X+ + adiw r26, 1 + ld TMP_0, X+ /* load tmp with SRC_WORDV */ + ld TMP_1, X + movw r26, TMP_0 + movw r24, SRC_SIZE_0 + add r24, SCALE_0 + adc r25, SCALE_1 + clt + cp r24, DST_SIZE_0 + cpc r25, DST_SIZE_1 + brlo 30f + set + movw r24, DST_SIZE_0 +30: + adiw r24, 0 + breq 35f + inc r25 + clc +31: + ld TMP_0, X+ + ld TMP_1, Z + adc TMP_1, TMP_0 + st Z+, TMP_1 + dec r24 + brne 31b + dec r25 + brne 31b +35: + rol TMP_1 + brts 40f + /* dst is longer than src+scale */ + ror TMP_1 +38: + ld TMP_0, Z + adc TMP_0, r1 + st Z+, TMP_0 + brcs 38b + rjmp 90f +40: + /* dst is shorter than src+scale */ + movw r24, SRC_SIZE_0 + sub r24, DST_SIZE_0 + sbc r25, DST_SIZE_1 + add r24, SCALE_0 + adc r25, SCALE_1 + adiw r24, 0 + breq 90f + inc r25 + ror TMP_1 +45: + ld TMP_0, X+ + adc TMP_0, r1 + st Z+, TMP_0 + dec r24 + brne 45b + dec r25 + brne 45b + +90: + movw r24, DST_CTX_0 + pop_range 6, 11 + rjmp bigint_adjust + +#endif + +/******************************************************************************/ +/******************************************************************************/ +/******************************************************************************/ + +DST_LEN_0 = 22 +DST_LEN_1 = 23 +SRC_LEN_0 = 20 +SRC_LEN_1 = 21 +SCALE_0 = 18 +SCALE_1 = 19 +DST_CTX_0 = 6 +DST_CTX_1 = 7 +SRC_CTX_0 = 8 +SRC_CTX_1 = 9 +TMP_0 = 10 +TMP_1 = 11 + +bigint_add_scale_u: + push_range 6, 11 + movw r30, r24 /* dest ptr */ + movw r26, r22 /* src ptr */ + movw r24, r20 /* scale */ + movw DST_CTX_0, r30 + movw SRC_CTX_0, r26 + movw SCALE_0, r24 + + /* pad dest with zeros to length of SRC_LENGTH + scale */ + ld SRC_LEN_0, X+ + ld SRC_LEN_1, X+ + adiw r26, 1 + ld TMP_0, X+ + ld TMP_1, X+ + movw r26, TMP_0 /* move SRC_WORDV to X */ + ldd DST_LEN_0, Z+0 + ldd DST_LEN_1, Z+1 + ldd TMP_0, Z+3 + ldd TMP_1, Z+4 + movw r30, TMP_0 /* move DEST_WORDV to Z */ + movw TMP_0, SCALE_0 + sub TMP_0, DST_LEN_0 + sbc TMP_1, DST_LEN_1 + movw r24, TMP_0 + brmi 40f /* no padding needed since DST_LEN > scale */ + add r30, DST_LEN_0 /* add DST_LEN to Z (DEST_WORDV)*/ + adc r31, DST_LEN_1 + /* pad and copy src in front of dest */ +10: /* padding loop */ + sbiw r24, 1 + brmi 11f + st Z+, r1 + rjmp 10b +11: + /* start of copy */ + + movw r24, SRC_LEN_0 + +12: /* copy loop */ + sbiw r24, 1 + brmi 13f + ld TMP_0, X+ + st Z+, TMP_0 + rjmp 12b +13: + movw TMP_0, SCALE_0 + add TMP_0, SRC_LEN_0 + adc TMP_1, SRC_LEN_1 + movw r30, DST_CTX_0 + std Z+0, TMP_0 + std Z+1, TMP_1 + movw r24, r30 +99: + pop_range 6, 11 + rjmp bigint_adjust +40: + /* TODO */ + /* Z points at DST_WORDV */ + /* X points at SRC_WORDV */ + /* r24:r25 contains scale - DST_LEN (negativ) */ + /* set T bit if DST_LEN > SCR_LEN + scale */ + clt + add r30, SCALE_0 + adc r31, SCALE_1 + add TMP_0, SRC_LEN_0 + adc TMP_1, SRC_LEN_1 + brpl 41f + set + /* DST_LEN > SRC_LEN + scale && DST_LEN > scale */ + /* + +-------+-------+ SRC + scale + +------+------------+ DST + */ + movw r24, SRC_LEN_0 + rjmp 44f +41: + /* DST_LEN <= SRC_LEN + scale && DST_LEN > scale */ + /* + +-------+-------+ SRC + scale + +------------+ DST + */ + com r24 /* negate r24:r25 */ + com r25 + adiw r24, 1 +44: + clc +45: + dec r24 + brpl 46f + dec r25 + brmi 50f +46: ld TMP_0, X+ + ld TMP_1, Z + adc TMP_0, TMP_1 + st Z+, TMP_0 + rjmp 45b + +50: + /* do the overhaning part */ + rol r1 + movw r24, r30 + movw r30, DST_CTX_0 + ldd TMP_0, Z+3 + ldd TMP_1, Z+4 + movw r30, TMP_0 + add r30, DST_LEN_0 + adc r31, DST_LEN_1 + adiw r30, 1 + st Z, r1 + movw r30, r24 + ror r1 + brtc 60f +51: brcc 53f +52: ld TMP_0, Z + adc TMP_0, r1 + st Z+, TMP_0 + brcs 52b +53: + /* TODO */ + movw r24, r30 + movw r30, DST_CTX_0 + ldd TMP_0, Z+3 + ldd TMP_1, Z+4 + sub r24, TMP_0 + sbc r25, TMP_1 + std Z+0, r24 + std Z+1, r25 + movw r24, r30 + rjmp 99b + +60: rol r1 /* backup carry */ + movw r24, SRC_LEN_0 + add r24, SCALE_0 + adc r25, SCALE_1 + sub r24, DST_LEN_0 + sbc r25, DST_LEN_1 + ror r1 /* restore carry */ + +61: dec r24 + brpl 62f + dec r25 + brmi 63f +62: ld TMP_0, X+ + adc TMP_0, r1 + st Z+, TMP_0 + rjmp 61b +63: + brcc 53b + ldi r24, 1 + st Z+, r24 + rjmp 53b diff --git a/doc/acl_blockciphers.texi b/doc/acl_blockciphers.texi index 93b6a8d..d256019 100644 --- a/doc/acl_blockciphers.texi +++ b/doc/acl_blockciphers.texi @@ -1,8 +1,7 @@ @c acl_blockcipher.texi @section Block ciphers -@subsection What a block cipher does - A block cipher is a algorithm which turn an input of fixed length into an + A block cipher is a algorithm which turns an input of fixed length into an output of the same length (enciphering or encrypting). The transformation is specified by a key which has to be of a fixed length, or a length of a given set or range. diff --git a/host/bigint_test.rb b/host/bigint_test.rb index 641f287..6a1930a 100644 --- a/host/bigint_test.rb +++ b/host/bigint_test.rb @@ -254,17 +254,18 @@ def add_scale_test(a, b, scale) begin line = $sp.gets() line = "" if line==nil - puts("DBG got: "+line) if $debug + puts("DBG got (#{__LINE__}): "+line) if $debug if /^Error:.*/.match(line) puts line return false end end while not /[\s]*enter a:[\s]*/.match(line) + puts("DBG put (#{__LINE__}): "+a.to_s(16)+" ") if $debug $sp.print(a.to_s(16)+" ") begin line = $sp.gets() line = "" if line==nil - puts("DBG got: "+line) if $debug + puts("DBG got (#{__LINE__}): "+line) if $debug if /^Error:.*/.match(line) puts line return false @@ -274,17 +275,17 @@ def add_scale_test(a, b, scale) begin line = $sp.gets() line = "" if line==nil - puts("DBG got: "+line) if $debug + puts("DBG got (#{__LINE__}): "+line) if $debug if /^Error:.*/.match(line) puts line return false end end while not /[\s]*enter scale:[\s]*/.match(line) - $sp.print(scale.to_s(16)+"\n") + $sp.print(scale.to_s(10)+"\r") begin line = $sp.gets() line = "" if line==nil - puts("DBG got: "+line) if $debug + puts("DBG got (#{__LINE__}): "+line) if $debug if /^Error:.*/.match(line) puts line return false @@ -295,12 +296,13 @@ def add_scale_test(a, b, scale) s_ = m[3].to_i(16) c_ = m[4].to_i(16) line.chomp! - if(a_== a && b_ == b && c_ == (a+b)) + should = a + (b<<(8*scale)) + if(a_== a && b_ == b && s_ == scale && c_ == should ) $logfile.printf("[pass]: %s\n", line) return true else - $logfile.printf("[fail (%s%s%s)]: %s", (a==a_)?"":"a", (b==b_)?"":"b", (c_==a+b)?"":"c",line) - $logfile.printf(" ; should %s + %s = %s\n", a.to_s(16), b.to_s(16), (a+b).to_s(16)) + $logfile.printf("[fail (%s%s%s)]: %s", (a==a_)?"":"a", (b==b_)?"":"b", (scale==s_)?"":"s",(c_==should)?"":"c",line) + $logfile.printf(" ; should %s + %s << 8*%s = %s\n", a.to_s(16), b.to_s(16), scale.to_s(16), should.to_s(16)) return false end return false @@ -552,6 +554,41 @@ def run_test_add(skip=0) end while length_a_B<4096/8 end +################################################################################ +# run_test_add_scale # +################################################################################ + +def run_test_add_scale(skip=0) + length_a_B = skip+1 + length_b_B = skip+1 + begin + $size = length_a_B + (0..16).each do |i| + (0..300).each do |scale| + a = rand(256**length_a_B) + b = rand(256**length_a_B) + v = add_scale_test(a, b, scale) + screen_progress(v) + v = add_scale_test(b, a, scale) + screen_progress(v) + end + end + (0..16).each do |i| + (0..300).each do |scale| + b_size = rand(length_b_B+1) + a = rand(256**length_a_B) + b = rand(256**b_size) + v = add_scale_test(a, b, scale) + screen_progress(v) + v = add_scale_test(b, a, scale) + screen_progress(v) + end + end + length_a_B += 1 + length_b_B += 1 + end while length_a_B<4096/8 +end + ################################################################################ # run_test_mul # ################################################################################ @@ -762,12 +799,14 @@ $logfile.printf("seed = 0x%X\n", 0xdeadbeef) tests = Hash.new tests['a'] = proc {|x| run_test_add(x) } tests['m'] = proc {|x| run_test_mul(x) } +tests['x'] = proc {|x| run_test_add_scale(x) } tests['s'] = proc {|x| run_test_square(x) } tests['r'] = proc {|x| run_test_reduce(x) } tests['e'] = proc {|x| run_test_expmod(x) } tests['g'] = proc {|x| run_test_gcdext(x) } init_str = Hash.new init_str['a'] = 'add-test' +init_str['x'] = 'add-scale-test' init_str['m'] = 'mul-test' init_str['s'] = 'square-test' init_str['r'] = 'reduce-test' diff --git a/mkfiles/bigint.mk b/mkfiles/bigint.mk index ede274b..076e77d 100644 --- a/mkfiles/bigint.mk +++ b/mkfiles/bigint.mk @@ -2,7 +2,7 @@ ALGO_NAME := BIGINT # comment out the following line for removement of BigInt from the build process -#AUX += $(ALGO_NAME) +AUX += $(ALGO_NAME) $(ALGO_NAME)_DIR := bigint/ $(ALGO_NAME)_INCDIR := memxor/ noekeon/ diff --git a/test_src/main-bigint-test.c b/test_src/main-bigint-test.c index 608c91a..4daf319 100644 --- a/test_src/main-bigint-test.c +++ b/test_src/main-bigint-test.c @@ -104,12 +104,12 @@ void test_add_scale_bigint(void){ for(;;){ cli_putstr_P(PSTR("\r\nenter a:")); if(bigint_read_hex_echo(&a)){ - cli_putstr_P(PSTR("\r\n end add test")); + cli_putstr_P(PSTR("\r\n end add-scale test")); return; } cli_putstr_P(PSTR("\r\nenter b:")); if(bigint_read_hex_echo(&b)){ - cli_putstr_P(PSTR("\r\n end add test")); + cli_putstr_P(PSTR("\r\n end add-scale test")); return; } cli_putstr_P(PSTR("\r\nenter scale:")); @@ -140,8 +140,8 @@ void test_add_scale_bigint(void){ free(b.wordv); continue; } - bigint_copy(&c, &a); c.wordv = c_b; + bigint_copy(&c, &a); bigint_add_scale_u(&c, &b, scale); bigint_print_hex(&c); cli_putstr_P(PSTR("\r\n")); -- 2.39.5