From: bg <bg@nerilex.org>
Date: Mon, 20 Jul 2015 07:44:30 +0000 (+0200)
Subject: new and more compact aes
X-Git-Url: https://git.cryptolib.org/?a=commitdiff_plain;ds=inline;p=avr-crypto-lib.git

new and more compact aes
---

diff --git a/aes/aes_aleph_enc-asm.S b/aes/aes_aleph_enc-asm.S
new file mode 100644
index 0000000..ddc7000
--- /dev/null
+++ b/aes/aes_aleph_enc-asm.S
@@ -0,0 +1,185 @@
+/* aes_enc-asm.S */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2006-2015 Daniel Otte (bg@nerilex.org)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/**
+ * \file     aes_enc-asm.S
+ * \email    bg@nerilex.org
+ * \author   Daniel Otte 
+ * \date     2009-01-10
+ * \license  GPLv3 or later
+ * 
+ */
+
+#include "avr-asm-macros.S"
+
+
+xtime:
+	lsl r24
+	brcc 1f
+	eor r24, r27
+1:
+	ret
+
+shift_offset_table:
+	.byte 12,  8,  4,  0
+	.byte  9,  5,  1, 13
+	.byte  6,  2, 14, 10
+	.byte  3, 15, 11,  7
+
+.global aes256_enc
+aes256_enc:
+	ldi r20, 14
+	rjmp aes_encrypt_core
+
+.global aes192_enc
+aes192_enc:
+	ldi r20, 12
+	rjmp aes_encrypt_core
+
+.global aes128_enc
+aes128_enc:
+	ldi r20, 10
+
+/*
+  void aes_encrypt_core(aes_cipher_state_t *state, const aes_genctx_t *ks, uint8_t rounds)
+*/
+/*
+ * param state:  r24:r25
+ * param ks:     r22:r23
+ * param rounds: r20   
+ */
+
+.global aes_encrypt_core
+aes_encrypt_core:
+	push r3
+	push r16
+	push r17
+	push r28
+	push r29
+	mov r3, r20
+	clt
+	movw r28, r24
+x:
+	movw r24, r28
+key_add:
+
+	clr r21
+	ldi r20, 16
+	call memxor
+	movw r22, r26 /* switch to next roundkey; r26 points after the end of src after memxor ;-) */
+
+	brtc sub_shift_bytes
+4:
+	pop r29
+	pop r28
+	pop r17
+	pop r16
+	pop r3
+	ret
+
+sub_shift_bytes:
+	ldi r30, lo8(shift_offset_table)
+	ldi r31, hi8(shift_offset_table)
+	ldi r20, 4   /* load counter for columns (rows in spec) */
+	movw r24, r28
+1:
+	ldi r21, 4
+2:
+	ld r16, Y
+	adiw r28, 4
+	push r16
+	dec r21
+	brne 2b
+
+	ldi r21, 4
+2:
+	pop r16
+	movw r26, r24
+	lpm r0, Z+
+	add r26, r0
+	adc r27, r1
+	st X, r16
+	dec r21
+	brne 2b
+
+	sbiw r28, 15
+
+	dec r20
+	brne 1b
+
+	sbiw r28, 4 /* set Y back to the start of state */
+
+	dec r3
+	brne mix_rows
+	set
+
+mix_rows:
+	ldi r31, hi8(aes_sbox)
+	ldi r27, 0x1B
+	ldi r20, 4
+1:
+	ldd r30, Y+0
+	lpm r16, Z
+	ldd r30, Y+1
+	lpm r17, Z
+	ldd r30, Y+2
+	lpm r18, Z
+	ldd r30, Y+3
+	lpm r19, Z
+
+	brts 2f
+	mov r26, r16
+
+	mov r24, r16
+	eor r24, r17
+
+	mov r21, r24
+	eor r21, r18
+	eor r21, r19
+
+	rcall xtime
+	eor r16, r24
+	eor r16, r21
+
+	mov r24, r17
+	eor r24, r18
+	rcall xtime
+	eor r17, r24
+	eor r17, r21
+
+	mov r24, r18
+	eor r24, r19
+	rcall xtime
+	eor r18, r24
+	eor r18, r21
+
+	mov r24, r19
+	eor r24, r26
+	rcall xtime
+	eor r19, r24
+	eor r19, r21
+2:
+	st Y+, r16
+	st Y+, r17
+	st Y+, r18
+	st Y+, r19
+	dec r20
+	brne 1b
+	sbiw r28, 16
+	rjmp x
+
diff --git a/aes/aes_aleph_keyschedule-asm.S b/aes/aes_aleph_keyschedule-asm.S
new file mode 100644
index 0000000..57b3c5c
--- /dev/null
+++ b/aes/aes_aleph_keyschedule-asm.S
@@ -0,0 +1,207 @@
+/* aes_keyschedule-asm */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2006-2015 Daniel Otte (bg@nerilex.org)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/**
+ * \file     aes_keyschedule-asm.S
+ * \email    bg@nerilex.org
+ * \author   Daniel Otte 
+ * \date     2009-01-09
+ * \license  GPLv3 or later
+ * 
+ */
+
+#include "avr-asm-macros.S"
+
+.global aes256_init
+aes256_init:
+	movw r20, r22
+	ldi r23, hi8(256)
+	ldi r22, lo8(256)
+	rjmp aes_init
+	
+.global aes192_init
+aes192_init:
+	movw r20, r22
+	ldi r23, hi8(192)
+	ldi r22, lo8(192)
+	rjmp aes_init
+	
+.global aes128_init
+aes128_init:
+	movw r20, r22
+	clr r23
+	ldi r22, 128
+
+/* 
+void aes_init(const void *key, uint16_t keysize_b, aes_genctx_t *ctx){
+	uint8_t hi,i,nk, next_nk;
+	uint8_t rc=1;
+	uint8_t tmp[4];
+	nk=keysize_b>>5; / * 4, 6, 8 * /
+	hi=4*(nk+6+1);
+	memcpy(ctx, key, keysize_b/8);
+	next_nk = nk;
+	for(i=nk;i<hi;++i){
+		*((uint32_t*)tmp) = ((uint32_t*)(ctx->key[0].ks))[i-1];
+		if(i!=next_nk){
+			if(nk==8 && i%8==4){
+				tmp[0] = pgm_read_byte(aes_sbox+tmp[0]);
+				tmp[1] = pgm_read_byte(aes_sbox+tmp[1]);
+				tmp[2] = pgm_read_byte(aes_sbox+tmp[2]);
+				tmp[3] = pgm_read_byte(aes_sbox+tmp[3]);
+			}
+		} else {
+			next_nk += nk;
+			aes_rotword(tmp);
+			tmp[0] = pgm_read_byte(aes_sbox+tmp[0]);
+			tmp[1] = pgm_read_byte(aes_sbox+tmp[1]);
+			tmp[2] = pgm_read_byte(aes_sbox+tmp[2]);
+			tmp[3] = pgm_read_byte(aes_sbox+tmp[3]);
+			tmp[0] ^= rc;
+			rc<<=1;
+		}
+		((uint32_t*)(ctx->key[0].ks))[i] = ((uint32_t*)(ctx->key[0].ks))[i-nk]
+		                                   ^ *((uint32_t*)tmp);
+	}
+}
+*/
+
+SBOX_SAVE0 = 14
+SBOX_SAVE1 = 15
+XRC = 17
+NK = 22
+C1 = 18
+NEXT_NK = 19
+HI = 23
+T0 = 20
+T1 = 21
+T2 = 24
+T3 = 25
+/*
+ * param key:       r24:r25
+ * param keysize_b: r22:r23
+ * param ctx:       r20:r21
+ */
+.global aes_init
+aes_init:
+	push_range 14, 17
+	push r28
+	push r29
+	movw r30, r20
+	movw r28, r20
+	movw r26, r24
+	lsr r23
+	ror r22
+	lsr r22
+	lsr r22 /* r22 contains keysize_b/8 */
+	mov C1, r22
+
+1:	/* copy key to ctx */ 
+	ld r0, X+
+	st Z+, r0
+	dec C1
+	brne 1b
+	
+	lsr NK
+	lsr NK
+	/* NK is now the number of 32-bit words in the supplied key */
+	bst NK, 3 /* set T if NK==8 */
+	mov NEXT_NK, NK
+	mov HI, NK
+	subi HI, -7 /* HI += 7 */
+	lsl HI
+	lsl HI
+	movw r26, r30
+	sbiw r26, 4
+	mov C1, NK
+	ldi XRC, 1
+1:	
+	ld T0, X+
+	ld T1, X+
+	ld T2, X+
+	ld T3, X+
+	cp NEXT_NK, C1
+	breq 2f 
+	brtc 5f
+	mov r16, C1
+	andi r16, 0x07
+	cpi r16, 0x04
+	brne 5f
+	rcall substitute
+	rjmp 5f
+2:
+	add NEXT_NK, NK
+	rcall substitute
+	mov r16, T0
+	mov T0, T1
+	mov T1, T2
+	mov T2, T3
+	mov T3, r16
+	eor T0, XRC
+	lsl XRC
+	brcc 3f
+	ldi XRC, 0x1b
+3:
+5:	
+	movw r30, r26
+
+	ld r0, Y+
+	eor r0, T0
+	st Z+, r0
+	ld r0, Y+
+	eor r0 ,T1
+	st Z+, r0
+	ld r0, Y+
+	eor r0, T2
+	st Z+, r0
+	ld r0, Y+
+	eor r0, T3
+	st Z+, r0
+	
+/*
+	st Z+, T0
+	st Z+, T1
+	st Z+, T2
+	st Z+, T3
+*/		
+	
+	inc C1
+	cp C1, HI
+	breq 6f
+	rjmp 1b
+6:	
+	
+	clt
+	pop r29
+	pop r28
+	pop_range 14, 17
+	ret
+	
+substitute:
+	ldi r31, hi8(aes_sbox)
+	mov r30, T0
+	lpm T0, Z
+	mov r30, T1
+	lpm T1, Z
+	mov r30, T2
+	lpm T2, Z
+	mov r30, T3
+	lpm T3, Z
+	ret
+	
+