X-Git-Url: https://git.cryptolib.org/?a=blobdiff_plain;f=seed-asm.S;h=84866c382098f266ed06bbb318bec5b2035d123e;hb=e5a49deb52521f019e37779d6e9d81ec4f02aba4;hp=bf9c840c515e64226c99983b673c46ef98d1f08f;hpb=e363148c4ed1265a963d310102ce5dd7c9e1e326;p=avr-crypto-lib.git

diff --git a/seed-asm.S b/seed-asm.S
index bf9c840..84866c3 100644
--- a/seed-asm.S
+++ b/seed-asm.S
@@ -1,6 +1,6 @@
 /* seed-asm.S */
 /*
-    This file is part of the Crypto-avr-lib/microcrypt-lib.
+    This file is part of the AVR-Crypto-Lib.
     Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
 
     This program is free software: you can redistribute it and/or modify
@@ -27,36 +27,6 @@
  */
 #include "avr-asm-macros.S"
 
-.global bigendian_sum32
-; === bigendian_sum32 ===
-; function that adds two 32-bit words in the bigendian way and returns the result
-;  param1: the first 32-bit word
-;	given in r25,r24,r23,22 (r25 is most significant for little endian)
-;  param2: the second 32-bit word
-;	given in r21,r20,r19,18 (r21 is most significant for little endian)
-;  modifys: 
-bigendian_sum32:
-	add r25, r21
-	adc r24, r20
-	adc r23, r19
-	adc r22, r18
-	ret
-	
-.global bigendian_sub32
-; === bigendian_sub32 ===
-; function that subtracts a 32-bit words from another in the bigendian way and returns the result
-;  param1: the minuend 32-bit word
-;	given in r25,r24,r23,22 (r25 is most significant for little endian)
-;  param2: the subtrahend 32-bit word
-;	given in r21,r20,r19,18 (r21 is most significant for little endian)
-;  modifys: 
-bigendian_sub32:
-	sub r25, r21
-	sbc r24, r20
-	sbc r23, r19
-	sbc r22, r18
-	ret
-
 /******************************************************************************/	
 /*
 #define M0 0xfc
@@ -112,8 +82,7 @@ T3 = X1
  *  X2 = R23
  *  X3 = R22
  */    
-.global g_function
-g_function:
+seed_g_function:
 	ldi r30, lo8(seed_sbox1)
 	ldi r31, hi8(seed_sbox1)
  	movw r26, r30
@@ -249,8 +218,740 @@ seed_sbox2:
 .byte    55,  231,   36,  164,  203,   83,   10,  135 
 .byte   217,   76,  131,  143,  206,   59,   74,  183 
 
+/******************************************************************************/
+
+/*
+static
+uint64_t f_function(const uint64_t* a, uint32_t k0, uint32_t k1){
+	uint32_t c,d;
+
+	c = *a & 0x00000000FFFFFFFFLL;
+	d = (*a>>32) & 0x00000000FFFFFFFFLL;
+	
+	c ^= k0; d ^= k1;
+	d ^= c;
+	d = g_function(d);
+	c = bigendian_sum32(c,d);
+	c = g_function(c);
+	d = bigendian_sum32(c,d);
+	d = g_function(d);
+	c = bigendian_sum32(c,d);	
+	return ((uint64_t)d << 32) | c;
+}
+*/
+/*
+ * param a   r24:r25
+ * param k0  r20:r23
+ * param k1  r16:r19
+ */
+D0 = 10
+D1 = 11
+C0 = 12
+C1 = 13
+C2 = 14
+C3 = 15
+D2 = 16
+D3 = 17
+seed_f_function:
+	push_range 10, 17
+	movw r30, r24
+	ld C0, Z+
+	ld C1, Z+
+	ld C2, Z+
+	ld C3, Z+
+	eor C0, r20
+	eor C1, r21
+	eor C2, r22
+	eor C3, r23
+	ld r22, Z+
+	ld r23, Z+
+	ld r24, Z+
+	ld r25, Z+
+	eor r22, r16
+	eor r23, r17
+	eor r24, r18
+	eor r25, r19
+	eor r22, C0
+	eor r23, C1
+	eor r24, C2
+	eor r25, C3
+	rcall seed_g_function
+	mov D0, r22
+	mov D1, r23
+	mov D2, r24
+	mov D3, r25
+
+	add r25, C3
+	adc r24, C2
+	adc r23, C1
+	adc r22, C0
+	rcall seed_g_function
+	mov C0, r22
+	mov C1, r23
+	mov C2, r24
+	mov C3, r25
+
+	add r25, D3
+	adc r24, D2
+	adc r23, D1
+	adc r22, D0
+	rcall seed_g_function
+	mov D0, r22
+	mov D1, r23
+	mov D2, r24
+	mov D3, r25
+
+	add C3, r25
+	adc C2, r24
+	adc C1, r23
+	adc C0, r22
+
+	mov r18, C0
+	mov r19, C1
+	mov r20, C2
+	mov r21, C3
+	
+	pop_range 10, 17
+	ret
+
+/******************************************************************************/
+/*
+void seed_init(uint8_t * key, seed_ctx_t * ctx){
+	memcpy(ctx->k, key, 128/8);
+}
+*/
+
+.global seed_init
+seed_init:
+	movw r26, r24
+	movw r30, r22
+	ldi r22, 16
+1:
+	ld r0, X+
+	st Z+, r0
+	dec r22
+	brne 1b	
+	ret
+/******************************************************************************/
+/*
+typedef struct {
+	uint32_t k0, k1;
+} keypair_t;
+
+keypair_t getnextkeys(uint32_t *keystate, uint8_t curround){
+	keypair_t ret;
+	if (curround>15){
+		/ * ERROR * /
+		ret.k0 = ret.k1 = 0;
+	} else {
+	/ *	ret.k0 = seed_g_function(keystate[0] + keystate[2] - pgm_read_dword(&(seed_kc[curround])));
+		ret.k1 = seed_g_function(keystate[1] - keystate[3] + pgm_read_dword(&(seed_kc[curround]))); * /
+		ret.k0 = bigendian_sum32(keystate[0], keystate[2]);
+		ret.k0 = bigendian_sub32(ret.k0, pgm_read_dword(&(seed_kc[curround])));
+		ret.k0 = seed_g_function(ret.k0);
+		ret.k1 = bigendian_sub32(keystate[1], keystate[3]);
+		ret.k1 = bigendian_sum32(ret.k1, pgm_read_dword(&(seed_kc[curround])));
+		ret.k1 = seed_g_function(ret.k1);
+		
+		if (curround & 1){
+			/ * odd round (1,3,5, ...) * /
+			((uint64_t*)keystate)[1] = bigendian_rotl8_64( ((uint64_t*)keystate)[1] );
+		} else {
+			/ * even round (0,2,4, ...) * /
+			((uint64_t*)keystate)[0] = bigendian_rotr8_64(((uint64_t*)keystate)[0]);
+		}
+	}
+	return ret;
+}
+*/
+/*
+ * param keystate: r24:r25
+ * param curround: r22
+ */
+XRC0 = 10
+XRC1 = 11
+XRC2 = 12
+XRC3 = 13
+D0 = 14
+D1 = 15
+D2 = 16
+D3 = 17
+
+compute_keys:
+	ldi r30, lo8(seed_kc)
+	ldi r31, hi8(seed_kc)
+	lsl r22
+	lsl r22
+	add r30, r22
+	adc r31, r1
+	lpm XRC0, Z+
+	lpm XRC1, Z+
+	lpm XRC2, Z+
+	lpm XRC3, Z+
+	movw r28, r24
+	ldd r25, Y+0*4+3
+	ldd r24, Y+0*4+2
+	ldd r23, Y+0*4+1
+	ldd r22, Y+0*4+0
+	
+	ldd r0, Y+2*4+3
+	add r25, r0
+	ldd r0, Y+2*4+2
+	adc r24, r0
+	ldd r0, Y+2*4+1
+	adc r23, r0
+	ldd r0, Y+2*4+0
+	adc r22, r0
+
+	sub r25, XRC3
+	sbc r24, XRC2
+	sbc r23, XRC1
+	sbc r22, XRC0
+	rcall seed_g_function
+	mov D0, r22
+	mov D1, r23
+	mov D2, r24
+	mov D3, r25
+	
 
+	ldd r25, Y+1*4+3
+	ldd r24, Y+1*4+2
+	ldd r23, Y+1*4+1
+	ldd r22, Y+1*4+0
 
+	ldd r0, Y+3*4+3
+	sub r25, r0
+	ldd r0, Y+3*4+2
+	sbc r24, r0
+	ldd r0, Y+3*4+1
+	sbc r23, r0
+	ldd r0, Y+3*4+0
+	sbc r22, r0
 
+	add r25, XRC3
+	adc r24, XRC2
+	adc r23, XRC1
+	adc r22, XRC0
+	rcall seed_g_function
 
+	mov r21, D3
+	mov r20, D2
+	mov r19, D1
+	mov r18, D0 
+	ret
+
+seed_getnextkeys:
+	push_range 10, 17
+	push r28
+	push r29
+;	andi r22, 0x0F
+	bst r22,0
+	rcall compute_keys		
+	brtc even_round
+odd_round:
+
+	adiw r28, 8
+	ld r26, Y
+	ldd r0, Y+1
+	std Y+0, r0
+	ldd r0, Y+2
+	std Y+1, r0
+	ldd r0, Y+3
+	std Y+2, r0
+	ldd r0, Y+4
+	std Y+3, r0
+	ldd r0, Y+5
+	std Y+4, r0
+	ldd r0, Y+6
+	std Y+5, r0
+	ldd r0, Y+7
+	std Y+6, r0
+	std Y+7, r26	
+/*
+	movw r30, r28
+	ld r26, Z+
+	ldi r27, 7
+1:
+	ld r0, Z+
+	st Y+, r0
+	dec r27
+	brne 1b
+	st Y, r26
+*/	
+	rjmp 4f
+
+even_round:
+
+	ldd r26, Y+7
+	ldd r0, Y+6
+	std Y+7, r0
+	ldd r0, Y+5
+	std Y+6, r0
+	ldd r0, Y+4
+	std Y+5, r0
+	ldd r0, Y+3
+	std Y+4, r0
+	ldd r0, Y+2
+	std Y+3, r0
+	ldd r0, Y+1
+	std Y+2, r0
+	ldd r0, Y+0
+	std Y+1, r0
+	std Y+0, r26
+/*
+	adiw r28, 7	
+	ld r26, Y
+	ldi r27, 7	
+1:
+	ld r0, -Y
+	std Y+1, r0
+	dec r27
+	brne 1b
+	st Y, r26
+*/
+4:	
+	pop r29
+	pop r28
+	pop_range 10, 17
+	ret
+
+/******************************************************************************/
+/*
+keypair_t getprevkeys(uint32_t *keystate, uint8_t curround){
+	keypair_t ret;
+	if (curround>15){
+		/ * ERROR * /
+		ret.k0 = ret.k1 = 0;
+	} else {
+		if (curround & 1){
+			/ * odd round (1,3,5, ..., 15) * /
+			((uint64_t*)keystate)[1] = bigendian_rotr8_64( ((uint64_t*)keystate)[1] );
+		} else {
+			/ * even round (0,2,4, ..., 14) * /
+			((uint64_t*)keystate)[0] = bigendian_rotl8_64(((uint64_t*)keystate)[0]);
+		}
+	/ *	ret.k0 = seed_g_function(keystate[0] + keystate[2] - pgm_read_dword(&(seed_kc[curround])));
+		ret.k1 = seed_g_function(keystate[1] - keystate[3] + pgm_read_dword(&(seed_kc[curround]))); * /
+		ret.k0 = bigendian_sum32(keystate[0], keystate[2]);
+		ret.k0 = bigendian_sub32(ret.k0, pgm_read_dword(&(seed_kc[curround])));
+		ret.k0 = seed_g_function(ret.k0);
+		ret.k1 = bigendian_sub32(keystate[1], keystate[3]);
+		ret.k1 = bigendian_sum32(ret.k1, pgm_read_dword(&(seed_kc[curround])));
+		ret.k1 = seed_g_function(ret.k1);
+		}
+	return ret;
+}
+*/
+/*
+ * param keystate: r24:r25
+ * param curround: r22
+ */
+
+seed_getprevkeys:
+	push_range 10, 17
+	push r28
+	push r29
+	movw r28, r24	
+;	andi r22, 0x0F
+	bst r22, 0
+	brts r_odd_round
+r_even_round:
+	ldd r26, Y+0
+	ldd r0, Y+1
+	std Y+0, r0
+	ldd r0, Y+2
+	std Y+1, r0
+	ldd r0, Y+3
+	std Y+2, r0
+	ldd r0, Y+4
+	std Y+3, r0
+	ldd r0, Y+5
+	std Y+4, r0
+	ldd r0, Y+6
+	std Y+5, r0
+	ldd r0, Y+7
+	std Y+6, r0
+	std Y+7, r26	
+/*
+	movw r30, r28
+	ld r26, Z+
+	ldi r27, 7
+1:
+	ld r0, Z+
+	st Y+, r0
+	dec r27
+	brne 1b
+	st Y, r26
+*/	
+
+	rjmp 4f
+r_odd_round:
+	ldd r26, Y+8+7
+	ldd r0, Y+8+6
+	std Y+8+7, r0
+	ldd r0, Y+8+5
+	std Y+8+6, r0
+	ldd r0, Y+8+4
+	std Y+8+5, r0
+	ldd r0, Y+8+3
+	std Y+8+4, r0
+	ldd r0, Y+8+2
+	std Y+8+3, r0
+	ldd r0, Y+8+1
+	std Y+8+2, r0
+	ldd r0, Y+8+0
+	std Y+8+1, r0
+	std Y+8+0, r26
+/*
+	adiw r28, 7	
+	ld r26, Y
+	ldi r27, 7	
+1:
+	ld r0, -Y
+	std Y+1, r0
+	dec r27
+	brne 1b
+	st Y, r26
+*/
+4:
+	rcall compute_keys	
+
+	pop r29
+	pop r28
+	pop_range 10, 17
+	ret
+
+/******************************************************************************/
+
+seed_kc:
+.long   0xb979379e 
+.long   0x73f36e3c
+.long   0xe6e6dd78 
+.long   0xcccdbbf1 
+.long   0x999b77e3 
+.long   0x3337efc6 
+.long   0x676ede8d 
+.long   0xcfdcbc1b 
+.long   0x9eb97937
+.long   0x3c73f36e	
+.long   0x78e6e6dd
+.long   0xf1cccdbb
+.long   0xe3999b77
+.long   0xc63337ef
+.long   0x8d676ede
+.long   0x1bcfdcbc
+
+/******************************************************************************/
+/*
+#define L (((uint64_t*)buffer)[0])
+#define R (((uint64_t*)buffer)[1])
+
+void seed_enc(void * buffer, seed_ctx_t * ctx){
+	uint8_t r;
+	keypair_t k;
+	for(r=0; r<8; ++r){
+			k = seed_getnextkeys(ctx->k, 2*r);
+/ *
+	DEBUG_S("\r\n\tDBG ka,0: "); uart_hexdump(&k.k0, 4);
+	DEBUG_S("\r\n\tDBG ka,1: "); uart_hexdump(&k.k1, 4);
+	DEBUG_S("\r\n\t DBG L:   "); uart_hexdump((uint8_t*)buffer+0, 8);
+	DEBUG_S("\r\n\t DBG R:   "); uart_hexdump((uint8_t*)buffer+8, 8);
+* /
+			L ^= seed_f_function(&R,k.k0,k.k1);
+			
+			k = seed_getnextkeys(ctx->k, 2*r+1);
+/ *
+	DEBUG_S("\r\n\tDBG kb,0: "); uart_hexdump(&k.k0, 4);
+	DEBUG_S("\r\n\tDBG kb,1: "); uart_hexdump(&k.k1, 4);
+	DEBUG_S("\r\n\t DBG L:   "); uart_hexdump((uint8_t*)buffer+8, 8);
+	DEBUG_S("\r\n\t DBG R:   "); uart_hexdump((uint8_t*)buffer+0, 8);
+* /
+			R ^= seed_f_function(&L,k.k0,k.k1);
+	}
+	/ * just an exchange without temp. variable * /
+	L ^= R;
+	R ^= L;
+	L ^= R;
+}
+*/
+/*
+ * param buffer: r24:r25
+ * param ctx:    r22:r23
+ */
+CTR  = 9
+xLPTR = 10
+xRPTR = 12
+CPTR = 14
+
+.global seed_enc
+seed_enc:
+	push_range 9, 17
+	push r28
+	push r29	
+	clr CTR
+	movw xLPTR, r24
+	adiw r24, 8
+	movw xRPTR, r24		
+	movw CPTR, r22
+1:
+	movw r28, xLPTR
+	movw r24, CPTR
+	mov r22, CTR
+	lsl r22
+	rcall seed_getnextkeys	
+
+	/* use pen & paper to understand the following permutation */
+	movw r16, r22
+	movw r22, r18
+	movw r18, r24
+	movw r24, r20
+	movw r20, r22
+	movw r22, r24
+	movw r24, xRPTR	
+
+	rcall seed_f_function
+
+	ld r0, Y
+	eor r0, r18
+	st Y+, r0
+	ld r0, Y
+	eor r0, r19
+	st Y+, r0
+	ld r0, Y
+	eor r0, r20
+	st Y+, r0
+	ld r0, Y
+	eor r0, r21
+	st Y+, r0
+	ld r0, Y
+	eor r0, r22
+	st Y+, r0
+	ld r0, Y
+	eor r0, r23
+	st Y+, r0
+	ld r0, Y
+	eor r0, r24
+	st Y+, r0
+	ld r0, Y
+	eor r0, r25
+	st Y+, r0
+	/* secound half */
+	movw r24, CPTR
+	mov r22, CTR
+	lsl r22
+	inc r22
+	rcall seed_getnextkeys	
+
+	movw r16, r22
+	movw r22, r18
+	movw r18, r24
+	movw r24, r20
+	movw r20, r22
+	movw r22, r24
+	movw r24, xLPTR	
+	
+	rcall seed_f_function
+
+	ld r0, Y
+	eor r0, r18
+	st Y+, r0
+	ld r0, Y
+	eor r0, r19
+	st Y+, r0
+	ld r0, Y
+	eor r0, r20
+	st Y+, r0
+	ld r0, Y
+	eor r0, r21
+	st Y+, r0
+	ld r0, Y
+	eor r0, r22
+	st Y+, r0
+	ld r0, Y
+	eor r0, r23
+	st Y+, r0
+	ld r0, Y
+	eor r0, r24
+	st Y+, r0
+	ld r0, Y
+	eor r0, r25
+	st Y+, r0
+	
+	inc CTR
+	bst CTR, 3
+	brts 3f
+	rjmp 1b
+3:
+	movw r28, xLPTR
+	movw r30, xRPTR
+	ldi r17, 8
+4:
+	ld r10, Y
+	ld r11, Z
+	st Z+, r10
+	st Y+, r11
+	dec r17
+	brne 4b
+5:
+	pop r29
+	pop r28
+	pop_range 9, 17
+	ret
+
+/******************************************************************************/
+/*
+#define L (((uint64_t*)buffer)[0])
+#define R (((uint64_t*)buffer)[1])
+
+void seed_dec(void * buffer, seed_ctx_t * ctx){
+	int8_t r;
+	keypair_t k;
+	for(r=7; r>=0; --r){
+			k = seed_getprevkeys(ctx->k, 2*r+1);
+/ *
+	DEBUG_S("\r\n\tDBG ka,0: "); uart_hexdump(&k.k0, 4);
+	DEBUG_S("\r\n\tDBG ka,1: "); uart_hexdump(&k.k1, 4);
+	DEBUG_S("\r\n\t DBG L: "); uart_hexdump((uint8_t*)buffer+0, 8);
+	DEBUG_S("\r\n\t DBG R: "); uart_hexdump((uint8_t*)buffer+8, 8);
+* /
+			L ^= seed_f_function(&R,k.k0,k.k1);
+			
+			k = seed_getprevkeys(ctx->k, 2*r+0);
+/ *
+	DEBUG_S("\r\n\tDBG kb,0: "); uart_hexdump(&k.k0, 4);
+	DEBUG_S("\r\n\tDBG kb,1: "); uart_hexdump(&k.k1, 4);
+	DEBUG_S("\r\n\t DBG L: "); uart_hexdump((uint8_t*)buffer+8, 8);
+	DEBUG_S("\r\n\t DBG R: "); uart_hexdump((uint8_t*)buffer+0, 8);
+* /
+			R ^= seed_f_function(&L,k.k0,k.k1);
+	}
+	/ * just an exchange without temp. variable * /
+	L ^= R;
+	R ^= L;
+	L ^= R;
+}
+*/
+/*
+ * param buffer: r24:r25
+ * param ctx:    r22:r23
+ */
+CTR  = 9
+xLPTR = 10
+xRPTR = 12
+CPTR = 14
+
+.global seed_dec
+seed_dec:
+	push_range 9, 17
+	push r28
+	push r29	
+	ldi r16, 7
+	mov CTR, r16
+	movw xLPTR, r24
+	adiw r24, 8
+	movw xRPTR, r24		
+	movw CPTR, r22
+1:
+	movw r28, xLPTR
+	movw r24, CPTR
+	mov r22, CTR
+	lsl r22
+	inc r22
+	rcall seed_getprevkeys	
+
+	/* use pen & paper to understand the following permutation */
+	movw r16, r22
+	movw r22, r18
+	movw r18, r24
+	movw r24, r20
+	movw r20, r22
+	movw r22, r24
+	movw r24, xRPTR	
+
+	rcall seed_f_function
+
+	ld r0, Y
+	eor r0, r18
+	st Y+, r0
+	ld r0, Y
+	eor r0, r19
+	st Y+, r0
+	ld r0, Y
+	eor r0, r20
+	st Y+, r0
+	ld r0, Y
+	eor r0, r21
+	st Y+, r0
+	ld r0, Y
+	eor r0, r22
+	st Y+, r0
+	ld r0, Y
+	eor r0, r23
+	st Y+, r0
+	ld r0, Y
+	eor r0, r24
+	st Y+, r0
+	ld r0, Y
+	eor r0, r25
+	st Y+, r0
+	/* secound half */
+	movw r24, CPTR
+	mov r22, CTR
+	lsl r22
+	rcall seed_getprevkeys	
+
+	movw r16, r22
+	movw r22, r18
+	movw r18, r24
+	movw r24, r20
+	movw r20, r22
+	movw r22, r24
+	movw r24, xLPTR	
+	
+	rcall seed_f_function
+
+	ld r0, Y
+	eor r0, r18
+	st Y+, r0
+	ld r0, Y
+	eor r0, r19
+	st Y+, r0
+	ld r0, Y
+	eor r0, r20
+	st Y+, r0
+	ld r0, Y
+	eor r0, r21
+	st Y+, r0
+	ld r0, Y
+	eor r0, r22
+	st Y+, r0
+	ld r0, Y
+	eor r0, r23
+	st Y+, r0
+	ld r0, Y
+	eor r0, r24
+	st Y+, r0
+	ld r0, Y
+	eor r0, r25
+	st Y+, r0
+	
+	dec CTR
+	brmi 3f
+	rjmp 1b
+3:
+	movw r28, xLPTR
+	movw r30, xRPTR
+	ldi r17, 8
+4:
+	ld r10, Y
+	ld r11, Z
+	st Z+, r10
+	st Y+, r11
+	dec r17
+	brne 4b
+5:
+	pop r29
+	pop r28
+	pop_range 9, 17
+	ret