X-Git-Url: https://git.cryptolib.org/?p=avr-crypto-lib.git;a=blobdiff_plain;f=bigint%2Fbigint_asm.S;h=c9260571f38d1fe50b7c97cdaa5c35a16bb13123;hp=a91ddad86530ccd4cbe7c97b50e8e57d4ce44718;hb=1f6be6ce98d01772fbb1f4d5205a13bea896a1f1;hpb=0ce5b31e9772c15cd1514e371a59f04e3a888111

diff --git a/bigint/bigint_asm.S b/bigint/bigint_asm.S
index a91ddad..c926057 100644
--- a/bigint/bigint_asm.S
+++ b/bigint/bigint_asm.S
@@ -1,7 +1,7 @@
 /* bigint_asm.S */
 /*
     This file is part of the ARM-Crypto-Lib.
-    Copyright (C) 2006-2010 Daniel Otte (daniel.otte@rub.de)
+    Copyright (C) 2006-2015 Daniel Otte (bg@nerilex.org)
 
     This program is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -24,7 +24,7 @@
 
 /******************************************************************************/
 /*
-void bigint_add_scale_u(bigint_t* dest, const bigint_t* a, uint16_t scale){
+void bigint_add_scale_u(bigint_t *dest, const bigint_t *a, uint16_t scale){
 	uint16_t i,j=0;
 	uint16_t t=0;
 	if(scale>dest->length_B)
@@ -66,102 +66,7 @@ TMP_0      = 10
 TMP_1      = 11
 
 .global bigint_add_scale_u
-#if 0
-bigint_add_scale_u:
-	push_range 6, 11
-	movw r30, r24 /* dest ptr */
-	movw r26, r22 /* src ptr */
-	movw r24, r20 /* scale */
-	movw DST_CTX_0, r30
-	movw SRC_CTX_0, r26
-	movw SCALE_0, r24
-	/* pad dst with zeros if scale > dst_length */
-	ld DST_SIZE_0, Z+
-	ld DST_SIZE_1, Z+
-	sub r24, DST_SIZE_0
-	sbc r25, DST_SIZE_1
-	ldd TMP_0, Z+1 /* load tmp with DST_WORDV */
-	ldd TMP_1, Z+2
-	movw r30, TMP_0
-	brmi 20f /* branch if DST_SIZE > scale */
-	add r30, DST_SIZE_0
-	adc r31, DST_SIZE_1
-10:
-	sbiw r24, 1
-	brmi 25f
-	st Z+, r1
-	rjmp 10b
-20:
-	add r30, r20 /* add scale to DST_WORDV */
-	adc r31, r21
-	/* add src to dest until one of the two ends */
-25:
-	ld SRC_SIZE_0, X+
-	ld SRC_SIZE_1, X+
-	adiw r26, 1
-	ld TMP_0, X+ /* load tmp with SRC_WORDV */
-	ld TMP_1, X
-	movw r26, TMP_0
-	movw r24, SRC_SIZE_0
-	add r24, SCALE_0
-	adc r25, SCALE_1
-	clt
-	cp  r24, DST_SIZE_0
-	cpc r25, DST_SIZE_1
-	brlo 30f
-	set
-	movw r24, DST_SIZE_0
-30:
-	adiw r24, 0
-	breq 35f
-	inc r25
-	clc
-31:
-	ld TMP_0, X+
-	ld TMP_1, Z
-	adc TMP_1, TMP_0
-	st Z+, TMP_1
-	dec r24
-	brne 31b
-	dec r25
-	brne 31b
-35:
-	rol TMP_1
-	brts 40f
-	/* dst is longer than src+scale */
-	ror TMP_1
-38:
-	ld TMP_0, Z
-	adc TMP_0, r1
-	st Z+, TMP_0
-	brcs 38b
-	rjmp 90f
-40:
-	/* dst is shorter than src+scale */
-	movw r24, SRC_SIZE_0
-	sub r24, DST_SIZE_0
-	sbc r25, DST_SIZE_1
-	add r24, SCALE_0
-	adc r25, SCALE_1
-	adiw r24, 0
-	breq 90f
-	inc r25
-	ror TMP_1
-45:
-	ld TMP_0, X+
-	adc TMP_0, r1
-	st Z+, TMP_0
-	dec r24
-	brne 45b
-	dec r25
-	brne 45b
 
-90:
-	movw r24, DST_CTX_0
-	pop_range 6, 11
-	rjmp bigint_adjust
-
-#endif
 
 /******************************************************************************/
 /******************************************************************************/
@@ -173,25 +78,34 @@ SRC_LEN_0 = 20
 SRC_LEN_1 = 21
 SCALE_0   = 18
 SCALE_1   = 19
-DST_CTX_0 =  6
-DST_CTX_1 =  7
-SRC_CTX_0 =  8
-SRC_CTX_1 =  9
+DST_CTX_0 =  8
+DST_CTX_1 =  9
 TMP_0     = 10
 TMP_1     = 11
 
 bigint_add_scale_u:
-	push_range 6, 11
 	movw r30, r24 /* dest ptr */
 	movw r26, r22 /* src ptr */
 	movw r24, r20 /* scale */
-	movw DST_CTX_0, r30
-	movw SRC_CTX_0, r26
+	/* check if scale is zero */
 	movw SCALE_0, r24
+	adiw r24, 0
+	brne 10f
+	movw r24, r30
+	movw r20, r30
+	rjmp bigint_add_u
+10:	/* check if src is zero */
+	ld r24, X+
+	ld r25, X+
+	adiw r24, 0
+	brne 10f
+	ret
+10:
+	movw SRC_LEN_0, r24
+	push_range 8, 11
+	movw DST_CTX_0, r30
 
 	/* pad dest with zeros to length of SRC_LENGTH + scale */
-	ld SRC_LEN_0, X+
-	ld SRC_LEN_1, X+
 	adiw r26, 1
 	ld TMP_0, X+
 	ld TMP_1, X+
@@ -216,7 +130,6 @@ bigint_add_scale_u:
 	rjmp 10b
 11:
 	/* start of copy */
-
 	movw r24, SRC_LEN_0
 
 12: /* copy loop */
@@ -234,13 +147,12 @@ bigint_add_scale_u:
 	std Z+1, TMP_1
 	movw r24, r30
 99:
-	pop_range 6, 11
+	pop_range 8, 11
 	rjmp bigint_adjust
 40:
-    /* TODO */
     /* Z points at DST_WORDV */
     /* X points at SRC_WORDV */
-    /* r24:r25 contains scale - DST_LEN (negativ) */
+    /* r24:r25 and TMP contain scale - DST_LEN (negativ) */
     /* set T bit if DST_LEN > SCR_LEN + scale */
     clt
     add r30, SCALE_0
@@ -262,36 +174,24 @@ bigint_add_scale_u:
 	       +-------+-------+ SRC + scale
 	          +------------+ DST
 	*/
-	com r24 /* negate r24:r25 */
+	com r24 /* negate r24:r25 ==> DST_LEN - scale */
 	com r25
 	adiw r24, 1
+	breq 50f
 44:
+	inc r25
 	clc
 45:
-	dec r24
-	brpl 46f
-	dec r25
-	brmi 50f
 46:	ld TMP_0, X+
 	ld TMP_1, Z
 	adc TMP_0, TMP_1
 	st Z+, TMP_0
-	rjmp 45b
+	dec r24
+	brne 46b
+	dec r25
+	brne 46b
 
-50:
-    /* do the overhaning part */
-    rol r1
-    movw r24, r30
-    movw r30, DST_CTX_0
-    ldd TMP_0, Z+3
-    ldd TMP_1, Z+4
-    movw r30, TMP_0
-    add r30, DST_LEN_0
-    adc r31, DST_LEN_1
-    adiw r30, 1
-    st Z, r1
-    movw r30, r24
-    ror r1
+50: ;st Z, r1
 	brtc 60f
 51:	brcc 53f
 52:	ld TMP_0, Z
@@ -299,34 +199,42 @@ bigint_add_scale_u:
 	st Z+, TMP_0
 	brcs 52b
 53:
-    /* TODO */
+    /* epilogue */
     movw r24, r30
     movw r30, DST_CTX_0
     ldd TMP_0, Z+3
     ldd TMP_1, Z+4
     sub r24, TMP_0
     sbc r25, TMP_1
+    cp  r24, DST_LEN_0
+    cpc r25, DST_LEN_1
+    brmi 54f
     std Z+0, r24
     std Z+1, r25
-    movw r24, r30
+54: movw r24, r30
     rjmp 99b
 
-60: rol r1 /* backup carry */
+60: st Z, r1
+	rol r1 /* backup carry */
 	movw r24, SRC_LEN_0
     add r24, SCALE_0
     adc r25, SCALE_1
     sub r24, DST_LEN_0
     sbc r25, DST_LEN_1
+
+	adiw r24, 0
+	breq 63f
+	inc r25
     ror r1 /* restore carry */
 
-61: dec r24
-    brpl 62f
-    dec r25
-    brmi 63f
+61:
 62: ld TMP_0, X+
     adc TMP_0, r1
     st Z+, TMP_0
-    rjmp 61b
+    dec r24
+    brne 62b
+    dec r25
+    brne 62b
 63:
 	brcc 53b
 	ldi r24, 1