From 0ce5b31e9772c15cd1514e371a59f04e3a888111 Mon Sep 17 00:00:00 2001
From: bg <bg@b1d182e4-1ff8-0310-901f-bddb46175740>
Date: Tue, 5 Jul 2011 19:34:03 +0000
Subject: [PATCH] some bigint stuff in ASM

---
 avr-asm-macros.S            |   3 +
 bigint/bigint.c             |   3 +-
 bigint/bigint_add_u.S       |   3 +-
 bigint/bigint_adjust.S      | 110 ++++++++++++
 bigint/bigint_asm.S         | 334 ++++++++++++++++++++++++++++++++++++
 doc/acl_blockciphers.texi   |   3 +-
 host/bigint_test.rb         |  55 +++++-
 mkfiles/bigint.mk           |   2 +-
 test_src/main-bigint-test.c |   6 +-
 9 files changed, 501 insertions(+), 18 deletions(-)
 create mode 100644 bigint/bigint_adjust.S
 create mode 100644 bigint/bigint_asm.S

diff --git a/avr-asm-macros.S b/avr-asm-macros.S
index 63f9303..766cdae 100644
--- a/avr-asm-macros.S
+++ b/avr-asm-macros.S
@@ -26,6 +26,8 @@
  *
  */
 
+//#ifndef AVR_ASM_MACROS__S__
+//#define AVR_ASM_MACROS__S__
 #include <avr/io.h>
 
 /*******************************************************************************
@@ -144,4 +146,5 @@
 *******************************************************************************/
 
 
+//#endif /* AVR_ASM_MACROS__S__ */
 
diff --git a/bigint/bigint.c b/bigint/bigint.c
index 0ff6338..3e2f0eb 100644
--- a/bigint/bigint.c
+++ b/bigint/bigint.c
@@ -77,9 +77,9 @@ void bigint_adjust(bigint_t* a){
 /******************************************************************************/
 
 void bigint_copy(bigint_t* dest, const bigint_t* src){
-	memcpy(dest->wordv, src->wordv, src->length_B);
 	dest->length_B = src->length_B;
 	dest->info = src->info;
+	memcpy(dest->wordv, src->wordv, src->length_B);
 }
 
 /******************************************************************************/
@@ -611,7 +611,6 @@ void bigint_reduce(bigint_t* a, const bigint_t* r){
 	while(bigint_cmp_u(a,r)>=0){
 		bigint_sub_u(a,a,r);
 	}
-	bigint_adjust(a);
 }
 
 /******************************************************************************/
diff --git a/bigint/bigint_add_u.S b/bigint/bigint_add_u.S
index 7c34f1a..5c75899 100644
--- a/bigint/bigint_add_u.S
+++ b/bigint/bigint_add_u.S
@@ -26,7 +26,6 @@
  *
  */
 
-#include "avr-asm-macros.S"
 
 /*
  param dest: r24:r25
@@ -132,6 +131,6 @@ bigint_add_u:
 9:
 	pop_range 24, 25
 	pop_range 28, 29
-	jmp bigint_adjust
+	rjmp bigint_adjust
 
 
diff --git a/bigint/bigint_adjust.S b/bigint/bigint_adjust.S
new file mode 100644
index 0000000..56ff2a1
--- /dev/null
+++ b/bigint/bigint_adjust.S
@@ -0,0 +1,110 @@
+/* bigint_adjust.S */
+/*
+    This file is part of the ARM-Crypto-Lib.
+    Copyright (C) 2006-2010 Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+void bigint_adjust(bigint_t* a){
+	while(a->length_B!=0 && a->wordv[a->length_B-1]==0){
+		a->length_B--;
+	}
+	if(a->length_B==0){
+		a->info=0;
+		return;
+	}
+	uint8_t t;
+	uint8_t i = 0x07;
+	t = a->wordv[a->length_B-1];
+	while((t&0x80)==0 && i){
+		t<<=1;
+		i--;
+	}
+	SET_FBS(a, i);
+}
+*/
+.global bigint_adjust
+bigint_adjust:
+	movw r30, r24
+	ldd r24, Z+0
+	ldd r25, Z+1
+	ldd r26, Z+3
+	ldd r27, Z+4
+	add r26, r24
+	adc r27, r25
+20:
+	sbiw r24, 1
+	brmi 30f
+	ld r23, -X
+	tst r23
+	brne 40f
+	rjmp 20b
+30:
+	std Z+0, r1
+	std Z+0, r1
+	std Z+2, r1
+	ret
+40:
+	adiw r24, 1
+	std Z+0, r24
+	std Z+1, r25
+	clr r24
+50:
+	sbrc r23, 0
+	mov r25, r24
+	lsr r23
+	inc r24
+	brpl 50b
+	ldd r23, Z+2
+	andi r23, 0xF8
+	or r23, r25
+	std Z+2, r23
+bigint_adjust_ret:
+	ret
+
+/*
+void bigint_copy(bigint_t* dest, const bigint_t* src){
+	memcpy(dest->wordv, src->wordv, src->length_B);
+	dest->length_B = src->length_B;
+	dest->info = src->info;
+}
+*/
+.global bigint_copy
+bigint_copy:
+	movw r26, r24
+	movw r30, r22
+	/* copy length */
+	ld r24, Z+
+	st X+, r24
+	ld r25, Z+
+	st X+, r25
+	/* copy info */
+	ld r22, Z+
+	st X+, r22
+	/* load wordv pointers */
+	ld r22, Z+
+	ld r23, Z
+	movw r30, r22
+	ld r22, X+
+	ld r23, X
+	movw r26, r22
+10:
+	sbiw r24, 1
+	brmi bigint_adjust_ret
+	ld r22, Z+
+	st X+, r22
+	rjmp 10b
+
diff --git a/bigint/bigint_asm.S b/bigint/bigint_asm.S
new file mode 100644
index 0000000..a91ddad
--- /dev/null
+++ b/bigint/bigint_asm.S
@@ -0,0 +1,334 @@
+/* bigint_asm.S */
+/*
+    This file is part of the ARM-Crypto-Lib.
+    Copyright (C) 2006-2010 Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "avr-asm-macros.S"
+#include "bigint_adjust.S"
+#include "bigint_add_u.S"
+
+
+/******************************************************************************/
+/*
+void bigint_add_scale_u(bigint_t* dest, const bigint_t* a, uint16_t scale){
+	uint16_t i,j=0;
+	uint16_t t=0;
+	if(scale>dest->length_B)
+		memset(dest->wordv+dest->length_B, 0, scale-dest->length_B);
+	for(i=scale; i<a->length_B+scale; ++i,++j){
+		t = a->wordv[j] + t;
+		if(dest->length_B>i){
+			t += dest->wordv[i];
+		}
+		dest->wordv[i] = (uint8_t)t;
+		t>>=8;
+	}
+	while(t){
+		if(dest->length_B>i){
+			t = dest->wordv[i] + t;
+		}
+		dest->wordv[i] = (uint8_t)t;
+		t>>=8;
+		++i;
+	}
+	if(dest->length_B < i){
+		dest->length_B = i;
+	}
+	bigint_adjust(dest);
+}
+*/
+
+DST_SIZE_0 = 22
+DST_SIZE_1 = 23
+SRC_SIZE_0 = 20
+SRC_SIZE_1 = 23
+SCALE_0    = 18
+SCALE_1    = 19
+DST_CTX_0  =  6
+DST_CTX_1  =  7
+SRC_CTX_0  =  8
+SRC_CTX_1  =  9
+TMP_0      = 10
+TMP_1      = 11
+
+.global bigint_add_scale_u
+#if 0
+bigint_add_scale_u:
+	push_range 6, 11
+	movw r30, r24 /* dest ptr */
+	movw r26, r22 /* src ptr */
+	movw r24, r20 /* scale */
+	movw DST_CTX_0, r30
+	movw SRC_CTX_0, r26
+	movw SCALE_0, r24
+	/* pad dst with zeros if scale > dst_length */
+	ld DST_SIZE_0, Z+
+	ld DST_SIZE_1, Z+
+	sub r24, DST_SIZE_0
+	sbc r25, DST_SIZE_1
+	ldd TMP_0, Z+1 /* load tmp with DST_WORDV */
+	ldd TMP_1, Z+2
+	movw r30, TMP_0
+	brmi 20f /* branch if DST_SIZE > scale */
+	add r30, DST_SIZE_0
+	adc r31, DST_SIZE_1
+10:
+	sbiw r24, 1
+	brmi 25f
+	st Z+, r1
+	rjmp 10b
+20:
+	add r30, r20 /* add scale to DST_WORDV */
+	adc r31, r21
+	/* add src to dest until one of the two ends */
+25:
+	ld SRC_SIZE_0, X+
+	ld SRC_SIZE_1, X+
+	adiw r26, 1
+	ld TMP_0, X+ /* load tmp with SRC_WORDV */
+	ld TMP_1, X
+	movw r26, TMP_0
+	movw r24, SRC_SIZE_0
+	add r24, SCALE_0
+	adc r25, SCALE_1
+	clt
+	cp  r24, DST_SIZE_0
+	cpc r25, DST_SIZE_1
+	brlo 30f
+	set
+	movw r24, DST_SIZE_0
+30:
+	adiw r24, 0
+	breq 35f
+	inc r25
+	clc
+31:
+	ld TMP_0, X+
+	ld TMP_1, Z
+	adc TMP_1, TMP_0
+	st Z+, TMP_1
+	dec r24
+	brne 31b
+	dec r25
+	brne 31b
+35:
+	rol TMP_1
+	brts 40f
+	/* dst is longer than src+scale */
+	ror TMP_1
+38:
+	ld TMP_0, Z
+	adc TMP_0, r1
+	st Z+, TMP_0
+	brcs 38b
+	rjmp 90f
+40:
+	/* dst is shorter than src+scale */
+	movw r24, SRC_SIZE_0
+	sub r24, DST_SIZE_0
+	sbc r25, DST_SIZE_1
+	add r24, SCALE_0
+	adc r25, SCALE_1
+	adiw r24, 0
+	breq 90f
+	inc r25
+	ror TMP_1
+45:
+	ld TMP_0, X+
+	adc TMP_0, r1
+	st Z+, TMP_0
+	dec r24
+	brne 45b
+	dec r25
+	brne 45b
+
+90:
+	movw r24, DST_CTX_0
+	pop_range 6, 11
+	rjmp bigint_adjust
+
+#endif
+
+/******************************************************************************/
+/******************************************************************************/
+/******************************************************************************/
+
+DST_LEN_0 = 22
+DST_LEN_1 = 23
+SRC_LEN_0 = 20
+SRC_LEN_1 = 21
+SCALE_0   = 18
+SCALE_1   = 19
+DST_CTX_0 =  6
+DST_CTX_1 =  7
+SRC_CTX_0 =  8
+SRC_CTX_1 =  9
+TMP_0     = 10
+TMP_1     = 11
+
+bigint_add_scale_u:
+	push_range 6, 11
+	movw r30, r24 /* dest ptr */
+	movw r26, r22 /* src ptr */
+	movw r24, r20 /* scale */
+	movw DST_CTX_0, r30
+	movw SRC_CTX_0, r26
+	movw SCALE_0, r24
+
+	/* pad dest with zeros to length of SRC_LENGTH + scale */
+	ld SRC_LEN_0, X+
+	ld SRC_LEN_1, X+
+	adiw r26, 1
+	ld TMP_0, X+
+	ld TMP_1, X+
+	movw r26, TMP_0 /* move SRC_WORDV to X */
+	ldd DST_LEN_0, Z+0
+	ldd DST_LEN_1, Z+1
+    ldd TMP_0, Z+3
+    ldd TMP_1, Z+4
+    movw r30, TMP_0 /* move DEST_WORDV to Z */
+	movw TMP_0, SCALE_0
+	sub TMP_0, DST_LEN_0
+	sbc TMP_1, DST_LEN_1
+	movw r24, TMP_0
+	brmi 40f /* no padding needed since DST_LEN > scale */
+	add r30, DST_LEN_0 /* add DST_LEN to Z (DEST_WORDV)*/
+	adc r31, DST_LEN_1
+	/* pad and copy src in front of dest */
+10: /* padding loop */
+	sbiw r24, 1
+	brmi 11f
+	st Z+, r1
+	rjmp 10b
+11:
+	/* start of copy */
+
+	movw r24, SRC_LEN_0
+
+12: /* copy loop */
+	sbiw r24, 1
+	brmi 13f
+	ld TMP_0, X+
+	st Z+, TMP_0
+	rjmp 12b
+13:
+	movw TMP_0, SCALE_0
+	add TMP_0, SRC_LEN_0
+	adc TMP_1, SRC_LEN_1
+	movw r30, DST_CTX_0
+	std Z+0, TMP_0
+	std Z+1, TMP_1
+	movw r24, r30
+99:
+	pop_range 6, 11
+	rjmp bigint_adjust
+40:
+    /* TODO */
+    /* Z points at DST_WORDV */
+    /* X points at SRC_WORDV */
+    /* r24:r25 contains scale - DST_LEN (negativ) */
+    /* set T bit if DST_LEN > SCR_LEN + scale */
+    clt
+    add r30, SCALE_0
+    adc r31, SCALE_1
+    add TMP_0, SRC_LEN_0
+    adc TMP_1, SRC_LEN_1
+	brpl 41f
+	set
+	/* DST_LEN > SRC_LEN + scale && DST_LEN > scale */
+	/*
+	       +-------+-------+ SRC + scale
+	   +------+------------+ DST
+	*/
+	movw r24, SRC_LEN_0
+	rjmp 44f
+41:
+	/* DST_LEN <= SRC_LEN + scale && DST_LEN > scale */
+	/*
+	       +-------+-------+ SRC + scale
+	          +------------+ DST
+	*/
+	com r24 /* negate r24:r25 */
+	com r25
+	adiw r24, 1
+44:
+	clc
+45:
+	dec r24
+	brpl 46f
+	dec r25
+	brmi 50f
+46:	ld TMP_0, X+
+	ld TMP_1, Z
+	adc TMP_0, TMP_1
+	st Z+, TMP_0
+	rjmp 45b
+
+50:
+    /* do the overhaning part */
+    rol r1
+    movw r24, r30
+    movw r30, DST_CTX_0
+    ldd TMP_0, Z+3
+    ldd TMP_1, Z+4
+    movw r30, TMP_0
+    add r30, DST_LEN_0
+    adc r31, DST_LEN_1
+    adiw r30, 1
+    st Z, r1
+    movw r30, r24
+    ror r1
+	brtc 60f
+51:	brcc 53f
+52:	ld TMP_0, Z
+	adc TMP_0, r1
+	st Z+, TMP_0
+	brcs 52b
+53:
+    /* TODO */
+    movw r24, r30
+    movw r30, DST_CTX_0
+    ldd TMP_0, Z+3
+    ldd TMP_1, Z+4
+    sub r24, TMP_0
+    sbc r25, TMP_1
+    std Z+0, r24
+    std Z+1, r25
+    movw r24, r30
+    rjmp 99b
+
+60: rol r1 /* backup carry */
+	movw r24, SRC_LEN_0
+    add r24, SCALE_0
+    adc r25, SCALE_1
+    sub r24, DST_LEN_0
+    sbc r25, DST_LEN_1
+    ror r1 /* restore carry */
+
+61: dec r24
+    brpl 62f
+    dec r25
+    brmi 63f
+62: ld TMP_0, X+
+    adc TMP_0, r1
+    st Z+, TMP_0
+    rjmp 61b
+63:
+	brcc 53b
+	ldi r24, 1
+	st Z+, r24
+	rjmp 53b
diff --git a/doc/acl_blockciphers.texi b/doc/acl_blockciphers.texi
index 93b6a8d..d256019 100644
--- a/doc/acl_blockciphers.texi
+++ b/doc/acl_blockciphers.texi
@@ -1,8 +1,7 @@
 @c acl_blockcipher.texi
 
 @section Block ciphers
-@subsection What a block cipher does
- A block cipher is a algorithm which turn an input of fixed length into an 
+ A block cipher is a algorithm which turns an input of fixed length into an 
  output of the same length (enciphering or encrypting). The transformation is 
  specified by a key which has to be of a fixed length, or a length of a given 
  set or range.
diff --git a/host/bigint_test.rb b/host/bigint_test.rb
index 641f287..6a1930a 100644
--- a/host/bigint_test.rb
+++ b/host/bigint_test.rb
@@ -254,17 +254,18 @@ def add_scale_test(a, b, scale)
   begin
     line = $sp.gets()
     line = "" if line==nil
-    puts("DBG got: "+line) if $debug
+    puts("DBG got (#{__LINE__}): "+line) if $debug
     if /^Error:.*/.match(line)
       puts line
       return false
     end
   end while not /[\s]*enter a:[\s]*/.match(line)
+  puts("DBG put (#{__LINE__}): "+a.to_s(16)+" ") if $debug
   $sp.print(a.to_s(16)+" ")
   begin
     line = $sp.gets()
     line = "" if line==nil
-    puts("DBG got: "+line) if $debug
+    puts("DBG got (#{__LINE__}): "+line) if $debug
     if /^Error:.*/.match(line)
       puts line
       return false
@@ -274,17 +275,17 @@ def add_scale_test(a, b, scale)
   begin
     line = $sp.gets()
     line = "" if line==nil
-    puts("DBG got: "+line) if $debug
+    puts("DBG got (#{__LINE__}): "+line) if $debug
     if /^Error:.*/.match(line)
       puts line
       return false
     end
   end while not /[\s]*enter scale:[\s]*/.match(line)
-  $sp.print(scale.to_s(16)+"\n")
+  $sp.print(scale.to_s(10)+"\r")
   begin
     line = $sp.gets()
     line = "" if line==nil
-    puts("DBG got: "+line) if $debug
+    puts("DBG got (#{__LINE__}): "+line) if $debug
     if /^Error:.*/.match(line)
       puts line
       return false
@@ -295,12 +296,13 @@ def add_scale_test(a, b, scale)
   s_ = m[3].to_i(16)
   c_ = m[4].to_i(16)
   line.chomp!
-  if(a_== a && b_ == b && c_ == (a+b))
+  should = a + (b<<(8*scale))
+  if(a_== a && b_ == b && s_ == scale && c_ == should )
     $logfile.printf("[pass]: %s\n", line)
     return true
   else
-    $logfile.printf("[fail (%s%s%s)]: %s", (a==a_)?"":"a", (b==b_)?"":"b", (c_==a+b)?"":"c",line)
-    $logfile.printf(" ; should %s + %s = %s\n", a.to_s(16), b.to_s(16), (a+b).to_s(16))
+    $logfile.printf("[fail (%s%s%s)]: %s", (a==a_)?"":"a", (b==b_)?"":"b", (scale==s_)?"":"s",(c_==should)?"":"c",line)
+    $logfile.printf(" ; should %s + %s << 8*%s = %s\n", a.to_s(16), b.to_s(16), scale.to_s(16), should.to_s(16))
     return false
   end
   return false
@@ -552,6 +554,41 @@ def run_test_add(skip=0)
   end while length_a_B<4096/8
 end
 
+################################################################################
+# run_test_add_scale                                                           #
+################################################################################
+
+def run_test_add_scale(skip=0)
+  length_a_B = skip+1
+  length_b_B = skip+1
+  begin
+    $size = length_a_B
+    (0..16).each do |i|
+      (0..300).each do |scale|
+        a = rand(256**length_a_B)
+        b = rand(256**length_a_B)
+        v = add_scale_test(a, b, scale)
+        screen_progress(v)
+        v = add_scale_test(b, a, scale)
+        screen_progress(v)
+      end
+    end
+    (0..16).each do |i|
+      (0..300).each do |scale|
+        b_size = rand(length_b_B+1)
+        a = rand(256**length_a_B)
+        b = rand(256**b_size)
+        v = add_scale_test(a, b, scale)
+        screen_progress(v)      
+        v = add_scale_test(b, a, scale)
+        screen_progress(v)
+      end
+    end
+    length_a_B += 1
+    length_b_B += 1
+  end while length_a_B<4096/8
+end
+
 ################################################################################
 # run_test_mul                                                                 #
 ################################################################################
@@ -762,12 +799,14 @@ $logfile.printf("seed = 0x%X\n", 0xdeadbeef)
 tests = Hash.new
 tests['a'] = proc {|x| run_test_add(x) }
 tests['m'] = proc {|x| run_test_mul(x) }
+tests['x'] = proc {|x| run_test_add_scale(x) }
 tests['s'] = proc {|x| run_test_square(x) }
 tests['r'] = proc {|x| run_test_reduce(x) }
 tests['e'] = proc {|x| run_test_expmod(x) }
 tests['g'] = proc {|x| run_test_gcdext(x) }
 init_str = Hash.new
 init_str['a'] = 'add-test'
+init_str['x'] = 'add-scale-test'
 init_str['m'] = 'mul-test'
 init_str['s'] = 'square-test'
 init_str['r'] = 'reduce-test'
diff --git a/mkfiles/bigint.mk b/mkfiles/bigint.mk
index ede274b..076e77d 100644
--- a/mkfiles/bigint.mk
+++ b/mkfiles/bigint.mk
@@ -2,7 +2,7 @@
 ALGO_NAME := BIGINT
 
 # comment out the following line for removement of BigInt from the build process
-#AUX += $(ALGO_NAME)
+AUX += $(ALGO_NAME)
 
 $(ALGO_NAME)_DIR      := bigint/
 $(ALGO_NAME)_INCDIR   := memxor/ noekeon/
diff --git a/test_src/main-bigint-test.c b/test_src/main-bigint-test.c
index 608c91a..4daf319 100644
--- a/test_src/main-bigint-test.c
+++ b/test_src/main-bigint-test.c
@@ -104,12 +104,12 @@ void test_add_scale_bigint(void){
 	for(;;){
 		cli_putstr_P(PSTR("\r\nenter a:"));
 		if(bigint_read_hex_echo(&a)){
-			cli_putstr_P(PSTR("\r\n end add test"));
+			cli_putstr_P(PSTR("\r\n end add-scale test"));
 			return;
 		}
 		cli_putstr_P(PSTR("\r\nenter b:"));
 		if(bigint_read_hex_echo(&b)){
-			cli_putstr_P(PSTR("\r\n end add test"));
+			cli_putstr_P(PSTR("\r\n end add-scale test"));
 			return;
 		}
 		cli_putstr_P(PSTR("\r\nenter scale:"));
@@ -140,8 +140,8 @@ void test_add_scale_bigint(void){
 			free(b.wordv);
 			continue;
 		}
-		bigint_copy(&c, &a);
 		c.wordv = c_b;
+		bigint_copy(&c, &a);
 		bigint_add_scale_u(&c, &b, scale);
 		bigint_print_hex(&c);
 		cli_putstr_P(PSTR("\r\n"));
-- 
2.39.2