From: bg <bg@b1d182e4-1ff8-0310-901f-bddb46175740>
Date: Thu, 6 Aug 2009 20:11:02 +0000 (+0000)
Subject: modified structure
X-Git-Url: https://git.cryptolib.org/?p=avr-crypto-lib.git;a=commitdiff_plain;h=d32eba56ce10ea6b9eff123b50d9842673b38f2b

modified structure
---

diff --git a/arcfour-asm.S b/arcfour-asm.S
deleted file mode 100644
index 1ef8218..0000000
--- a/arcfour-asm.S
+++ /dev/null
@@ -1,133 +0,0 @@
-/* arcfour-asm.S */
-/*
-    This file is part of the AVR-Crypto-Lib.
-    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-/* 
- * File:        arcfour-asm.S
- * Author:      Daniel Otte
- * Date:        2006-07-06
- * License:     GPLv3 or later
- * Description: Implementation of the ARCFOUR (RC4 compatible) stream cipher algorithm.
- * 
- */
- 
-#include <avr/io.h>
-#include "avr-asm-macros.S"
- 
- /* +---+---+---------------------+
- *  | i | j | ......<256>........ |
- *  +---+---+---------------------+
- */
- 
-.global arcfour_init
-/*
- *== arcfour_init ==
- *  this function initialises the context
- * param1: 16-bit pointer to the key
- *	given in r24:r25
- * param2: 8-bit integer indicating keylength in byte
- *	given in r22
- * param3: 16-bit pointer to a ctx struct
- *	given in r20:r21
- */
-arcfour_init:
-	push_ r28, r29
-	movw r26, r20   /* X points to ctx */
-	movw r30, r24   /* Z points to key */
-	st X+, r1
-	st X+, r1       /* X points to S */
-	movw r20, r26   /* store pointer to S in r21:r20 */
-	
-1:		
-	st X+, r1 
-	inc r1
-	brne 1b
-	
-	movw r26, r20
-	add r22, r30         /* r18 is keyindex counter */
-	clr r0
-	clr r19
-2:
-	ld r23, X
-	ld r18, Z+
-	add r19, r18
-	add r19, r23
-	movw r28, r20   /* load pointer to S in Y */
-	add r28, r19
-	adc r29, r1
-	ld r18, Y
-	st Y,  r23
-	st X+, r18
-	cp r30, r22
-	brne 3f
-	movw r30, r24
-3:		
-	inc r0
-	brne 2b	
-	pop_ r29, r28
-	ret
-
-/*
-uint8_t arcfour_gen(arcfour_ctx_t *c){
-	uint8_t t;
-	c->i++;
-	c->j += c->s[c->i];
-	t = c->s[c->j];
-	c->s[c->j] = c->s[c->i];
-	c->s[c->i] = t;
-	return c->s[(c->s[c->j] + c->s[c->i]) & 0xff];
-}
-*/
-.global arcfour_gen
-
-;== arcfour_gen ==
-;  this function initialises the context
-; param1: 16-bit pointer to a ctx struct
-;	given in r25,r24
-
-arcfour_gen:
-	movw r26, r24
-	ld r18, X
-	inc r18
-	st X+, r18
-	movw r30, r26
-	ld r19, X+
-	add r26, r18
-	adc r27, r1
-	ld r20, X
-	add r19, r20
-	st Z+, r19		/* i,j loaded&saved; X->S[i]; Z->S[0]; r20=S[i] */
-	add r30, r19
-	adc r31, r1
-	ld r21, Z		/* X->S[i]; Z->S[j]; r20=S[i]; r21=S[j]*/
-	st Z, r20
-	st X, r21
-	add r20, r21
-	adiw r24, 2
-	movw r26, r24 /* X and Z point to S */
-	add r26, r20
-	adc r27, r1
-	ld r24, X
-	clr r25
-	ret
-
-
-
-
-
-
diff --git a/arcfour.c b/arcfour.c
deleted file mode 100644
index e07193f..0000000
--- a/arcfour.c
+++ /dev/null
@@ -1,63 +0,0 @@
-/* arcfour.c */
-/*
-    This file is part of the AVR-Crypto-Lib.
-    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-/*
- * File:        arcfour.c
- * Author:      Daniel Otte
- * email:       daniel.otte@rub.de
- * Date:        2006-06-07
- * License:     GPLv3 or later
- * Description: Implementation of the ARCFOUR (RC4 compatible) stream cipher algorithm.
- *
- */
-
-#include <stdint.h>
-#include "arcfour.h"
-
-/*
- * length is length of key in bytes!
- */
-
-void arcfour_init(const void *key, uint8_t length_B, arcfour_ctx_t *ctx){
-	uint8_t t;
-	uint16_t x,y=0;
-	for(x=0; x<= 255; ++x)
-		ctx->s[x]=x;
-
-	for(x=0; x<= 255; ++x){
-		y += ctx->s[x] + ((uint8_t*)key)[x % length_B];
-		y &= 0xff;
-		/* ctx->s[y] <--> ctx->s[x] */
-		t = ctx->s[y];
-		ctx->s[y] = ctx->s[x];
-		ctx->s[x] = t;
-	}
-	ctx->i = ctx->j = 0;
-}
-
-uint8_t arcfour_gen(arcfour_ctx_t *ctx){
-	uint8_t t;
-	ctx->i++;
-	ctx->j += ctx->s[ctx->i];
-	/* ctx->s[i] <--> ctx->s[j] */
-	t = ctx->s[ctx->j];
-	ctx->s[ctx->j] = ctx->s[ctx->i];
-	ctx->s[ctx->i] = t;
-	return ctx->s[(ctx->s[ctx->j] + ctx->s[ctx->i]) & 0xff];
-}
-
diff --git a/arcfour.h b/arcfour.h
deleted file mode 100644
index d332d7f..0000000
--- a/arcfour.h
+++ /dev/null
@@ -1,91 +0,0 @@
-/* arcfour.h */
-/*
-    This file is part of the AVR-Crypto-Lib.
-    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-/*
- * File:	arcfour.h
- * Author:	Daniel Otte
- * Date: 	2006-06-07
- * License: GPLv3+
- * Description: Implementation of the ARCFOUR (RC4 compatible) stream cipher algorithm.
- */
-
-/**
- * \file	arcfour.h
- * \author	Daniel Otte
- * \date 	2006-06-07
- * \license GPLv3+
- * \brief Implementation of the ARCFOUR (RC4 compatible) stream cipher algorithm.
- *
- * This header file defines the interface of the ARCFOUR cipher implementation.
- *
- * This implementation aims to be compatible with the ARCFOUR description
- * available at
- * http://www.mozilla.org/projects/security/pki/nss/draft-kaukonen-cipher-arcfour-03.txt
- */
-
-
-#ifndef ARCFOUR_H_
-#define ARCFOUR_H_
-
-#include <stdint.h>
-
-/** \typedef arcfour_ctx_t
- * \brief type for arcfour context
- *
- * A variable of this type may contain a complete ARCFOUR context.
- * The context is used to store the state of the cipher and gets
- * created by the arcfour_init(arcfour_ctx_t *c, uint8_t *key, uint8_t length_B)
- * function. The context is of the fixed size of 258 bytes
- */
-
-/** \struct arcfour_ctx_st
- * \brief base for ::arcfour_ctx_t
- *
- * The struct holds the two indices and the S-Box
- */
-typedef struct arcfour_ctx_st {
-	uint8_t i,j;
-	uint8_t s[256];
-} arcfour_ctx_t;
-
-
-/** \fn void arcfour_init(const void *key, uint8_t length_B, arcfour_ctx_t *ctx)
- * \brief setup a context with a key
- *
- * This function sets up a ::arcfour_ctx_t context using
- * the supplied key of the given length.
- * \param ctx pointer to the context
- * \param key pointer to the key
- * \param length_B length of the key in bytes (between 1 and 255)
- */
-
-void arcfour_init(const void *key, uint8_t length_B, arcfour_ctx_t *ctx);
-
-/** \fn uint8_t arcfour_gen(arcfour_ctx_t *ctx)
- * \brief generates a byte of keystream
- *
- * This function generates the next byte of keystream
- * from the supplied ::arcfour_ctx_t context which is updated accordingly
- *
- * \param ctx pointer to the context
- * \return byte of keystream
- */
-
-uint8_t arcfour_gen(arcfour_ctx_t *ctx);
-
-#endif
diff --git a/arcfour/arcfour-asm.S b/arcfour/arcfour-asm.S
new file mode 100644
index 0000000..1ef8218
--- /dev/null
+++ b/arcfour/arcfour-asm.S
@@ -0,0 +1,133 @@
+/* arcfour-asm.S */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/* 
+ * File:        arcfour-asm.S
+ * Author:      Daniel Otte
+ * Date:        2006-07-06
+ * License:     GPLv3 or later
+ * Description: Implementation of the ARCFOUR (RC4 compatible) stream cipher algorithm.
+ * 
+ */
+ 
+#include <avr/io.h>
+#include "avr-asm-macros.S"
+ 
+ /* +---+---+---------------------+
+ *  | i | j | ......<256>........ |
+ *  +---+---+---------------------+
+ */
+ 
+.global arcfour_init
+/*
+ *== arcfour_init ==
+ *  this function initialises the context
+ * param1: 16-bit pointer to the key
+ *	given in r24:r25
+ * param2: 8-bit integer indicating keylength in byte
+ *	given in r22
+ * param3: 16-bit pointer to a ctx struct
+ *	given in r20:r21
+ */
+arcfour_init:
+	push_ r28, r29
+	movw r26, r20   /* X points to ctx */
+	movw r30, r24   /* Z points to key */
+	st X+, r1
+	st X+, r1       /* X points to S */
+	movw r20, r26   /* store pointer to S in r21:r20 */
+	
+1:		
+	st X+, r1 
+	inc r1
+	brne 1b
+	
+	movw r26, r20
+	add r22, r30         /* r18 is keyindex counter */
+	clr r0
+	clr r19
+2:
+	ld r23, X
+	ld r18, Z+
+	add r19, r18
+	add r19, r23
+	movw r28, r20   /* load pointer to S in Y */
+	add r28, r19
+	adc r29, r1
+	ld r18, Y
+	st Y,  r23
+	st X+, r18
+	cp r30, r22
+	brne 3f
+	movw r30, r24
+3:		
+	inc r0
+	brne 2b	
+	pop_ r29, r28
+	ret
+
+/*
+uint8_t arcfour_gen(arcfour_ctx_t *c){
+	uint8_t t;
+	c->i++;
+	c->j += c->s[c->i];
+	t = c->s[c->j];
+	c->s[c->j] = c->s[c->i];
+	c->s[c->i] = t;
+	return c->s[(c->s[c->j] + c->s[c->i]) & 0xff];
+}
+*/
+.global arcfour_gen
+
+;== arcfour_gen ==
+;  this function initialises the context
+; param1: 16-bit pointer to a ctx struct
+;	given in r25,r24
+
+arcfour_gen:
+	movw r26, r24
+	ld r18, X
+	inc r18
+	st X+, r18
+	movw r30, r26
+	ld r19, X+
+	add r26, r18
+	adc r27, r1
+	ld r20, X
+	add r19, r20
+	st Z+, r19		/* i,j loaded&saved; X->S[i]; Z->S[0]; r20=S[i] */
+	add r30, r19
+	adc r31, r1
+	ld r21, Z		/* X->S[i]; Z->S[j]; r20=S[i]; r21=S[j]*/
+	st Z, r20
+	st X, r21
+	add r20, r21
+	adiw r24, 2
+	movw r26, r24 /* X and Z point to S */
+	add r26, r20
+	adc r27, r1
+	ld r24, X
+	clr r25
+	ret
+
+
+
+
+
+
diff --git a/arcfour/arcfour.c b/arcfour/arcfour.c
new file mode 100644
index 0000000..e07193f
--- /dev/null
+++ b/arcfour/arcfour.c
@@ -0,0 +1,63 @@
+/* arcfour.c */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/*
+ * File:        arcfour.c
+ * Author:      Daniel Otte
+ * email:       daniel.otte@rub.de
+ * Date:        2006-06-07
+ * License:     GPLv3 or later
+ * Description: Implementation of the ARCFOUR (RC4 compatible) stream cipher algorithm.
+ *
+ */
+
+#include <stdint.h>
+#include "arcfour.h"
+
+/*
+ * length is length of key in bytes!
+ */
+
+void arcfour_init(const void *key, uint8_t length_B, arcfour_ctx_t *ctx){
+	uint8_t t;
+	uint16_t x,y=0;
+	for(x=0; x<= 255; ++x)
+		ctx->s[x]=x;
+
+	for(x=0; x<= 255; ++x){
+		y += ctx->s[x] + ((uint8_t*)key)[x % length_B];
+		y &= 0xff;
+		/* ctx->s[y] <--> ctx->s[x] */
+		t = ctx->s[y];
+		ctx->s[y] = ctx->s[x];
+		ctx->s[x] = t;
+	}
+	ctx->i = ctx->j = 0;
+}
+
+uint8_t arcfour_gen(arcfour_ctx_t *ctx){
+	uint8_t t;
+	ctx->i++;
+	ctx->j += ctx->s[ctx->i];
+	/* ctx->s[i] <--> ctx->s[j] */
+	t = ctx->s[ctx->j];
+	ctx->s[ctx->j] = ctx->s[ctx->i];
+	ctx->s[ctx->i] = t;
+	return ctx->s[(ctx->s[ctx->j] + ctx->s[ctx->i]) & 0xff];
+}
+
diff --git a/arcfour/arcfour.h b/arcfour/arcfour.h
new file mode 100644
index 0000000..d332d7f
--- /dev/null
+++ b/arcfour/arcfour.h
@@ -0,0 +1,91 @@
+/* arcfour.h */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/*
+ * File:	arcfour.h
+ * Author:	Daniel Otte
+ * Date: 	2006-06-07
+ * License: GPLv3+
+ * Description: Implementation of the ARCFOUR (RC4 compatible) stream cipher algorithm.
+ */
+
+/**
+ * \file	arcfour.h
+ * \author	Daniel Otte
+ * \date 	2006-06-07
+ * \license GPLv3+
+ * \brief Implementation of the ARCFOUR (RC4 compatible) stream cipher algorithm.
+ *
+ * This header file defines the interface of the ARCFOUR cipher implementation.
+ *
+ * This implementation aims to be compatible with the ARCFOUR description
+ * available at
+ * http://www.mozilla.org/projects/security/pki/nss/draft-kaukonen-cipher-arcfour-03.txt
+ */
+
+
+#ifndef ARCFOUR_H_
+#define ARCFOUR_H_
+
+#include <stdint.h>
+
+/** \typedef arcfour_ctx_t
+ * \brief type for arcfour context
+ *
+ * A variable of this type may contain a complete ARCFOUR context.
+ * The context is used to store the state of the cipher and gets
+ * created by the arcfour_init(arcfour_ctx_t *c, uint8_t *key, uint8_t length_B)
+ * function. The context is of the fixed size of 258 bytes
+ */
+
+/** \struct arcfour_ctx_st
+ * \brief base for ::arcfour_ctx_t
+ *
+ * The struct holds the two indices and the S-Box
+ */
+typedef struct arcfour_ctx_st {
+	uint8_t i,j;
+	uint8_t s[256];
+} arcfour_ctx_t;
+
+
+/** \fn void arcfour_init(const void *key, uint8_t length_B, arcfour_ctx_t *ctx)
+ * \brief setup a context with a key
+ *
+ * This function sets up a ::arcfour_ctx_t context using
+ * the supplied key of the given length.
+ * \param ctx pointer to the context
+ * \param key pointer to the key
+ * \param length_B length of the key in bytes (between 1 and 255)
+ */
+
+void arcfour_init(const void *key, uint8_t length_B, arcfour_ctx_t *ctx);
+
+/** \fn uint8_t arcfour_gen(arcfour_ctx_t *ctx)
+ * \brief generates a byte of keystream
+ *
+ * This function generates the next byte of keystream
+ * from the supplied ::arcfour_ctx_t context which is updated accordingly
+ *
+ * \param ctx pointer to the context
+ * \return byte of keystream
+ */
+
+uint8_t arcfour_gen(arcfour_ctx_t *ctx);
+
+#endif
diff --git a/base64/base64_dec.c b/base64/base64_dec.c
new file mode 100644
index 0000000..f057f54
--- /dev/null
+++ b/base64/base64_dec.c
@@ -0,0 +1,246 @@
+/* base64_dec.c */
+/*
+ *   This file is part of the AVR-Crypto-Lib.
+ *   Copyright (C) 2006, 2007, 2008  Daniel Otte (daniel.otte@rub.de)
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 3 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+/**
+ * base64 decoder (RFC3548)
+ * Author: Daniel Otte
+ * License: GPLv3
+ * 
+ * 
+ */
+
+#include <stdint.h>
+#include "base64_dec.h"
+
+#include "test_src/cli.h"
+
+/*
+ #define USE_GCC_EXTENSION
+*/
+#if 1
+
+#ifdef USE_GCC_EXTENSION
+
+static
+int ascii2bit6(char a){
+	switch(a){
+		case 'A'...'Z':
+			return a-'A';
+		case 'a'...'z':
+			return a-'a'+26;
+		case '0'...'9':
+			return a-'0'+52;
+		case '+':
+		case '-':
+			return 62;
+		case '/':
+		case '_':
+			return 63;
+		default:
+			return -1;
+	}
+}
+
+#else
+
+static
+uint8_t ascii2bit6(char a){
+	int r;
+	switch(a>>4){
+		case 0x5:
+		case 0x4: 
+			r=a-'A';
+			if(r<0 || r>25){
+				return -1;
+			} else {
+				return r;
+			}
+		case 0x7:
+		case 0x6: 
+			r=a-'a';
+			if(r<0 || r>25){
+				return -1;
+			} else {
+				return r+26;
+			}
+			break;
+		case 0x3:
+			if(a>'9')
+				return -1;
+			return a-'0'+52;
+		default:
+			break;	
+	}
+	switch (a){
+		case '+':
+		case '-':
+			return 62;
+		case '/':
+		case '_':
+			return 63;
+		default:
+			return 0xff;
+	}
+}
+
+#endif
+
+#else
+
+static 
+uint8_t ascii2bit6(uint8_t a){
+	if(a>='A' && a<='Z'){
+		return a-'A';
+	} else {
+		if(a>='a' && a<= 'z'){
+			return a-'a'+26;
+		} else {
+			if(a>='0' && a<='9'){
+				return a-'0'+52;
+			} else {
+				if(a=='+' || a=='-'){
+					return 62;
+				} else {
+					if(a=='/' || a=='_'){
+						return 63;
+					} else {
+						return 0xff;
+					}
+				}
+			}
+		}
+	}
+}
+
+#endif
+
+int base64_binlength(char* str, uint8_t strict){
+	int l=0;
+	uint8_t term=0;
+	for(;;){
+		if(*str=='\0')
+			break;
+		if(*str=='\n' || *str=='\r'){
+			str++;
+			continue;
+		}
+		if(*str=='='){
+			term++;
+			str++;
+			if(term==2){
+				break;
+			}
+			continue;
+		}
+		if(term)
+			return -1;
+		if(ascii2bit6(*str)==-1){
+			if(strict)
+				return -1;
+		} else {
+			l++;
+		}
+		str++;
+	}
+	switch(term){
+		case 0:
+			if(l%4!=0)
+				return -1;
+			return l/4*3;
+		case 1:
+			if(l%4!=3)
+				return -1;
+			return (l+1)/4*3-1;
+		case 2:
+			if(l%4!=2)
+				return -1;
+			return (l+2)/4*3-2;
+		default:
+			return -1;
+	}
+}
+
+/*
+  |543210543210543210543210|
+  |765432107654321076543210|
+
+        .      .      .     .
+  |54321054|32105432|10543210|
+  |76543210|76543210|76543210|
+
+*/
+
+int base64dec(void* dest, char* b64str, uint8_t strict){
+	uint8_t buffer[4];
+	uint8_t idx=0;
+	uint8_t term=0;
+	for(;;){
+//		cli_putstr_P(PSTR("\r\n  DBG: got 0x"));
+//		cli_hexdump(b64str, 1);
+		buffer[idx]= ascii2bit6(*b64str);
+//		cli_putstr_P(PSTR(" --> 0x"));
+//		cli_hexdump(buffer+idx, 1);
+		
+		if(buffer[idx]==0xFF){
+			if(*b64str=='='){
+				term++;
+				b64str++;
+				if(term==2)
+					goto finalize; /* definitly the end */
+			}else{
+				if(*b64str == '\0'){
+					goto finalize; /* definitly the end */
+				}else{
+					if(*b64str == '\r' || *b64str == '\n' || !(strict)){
+						b64str++; /* charcters that we simply ignore */
+					}else{
+						return -1;
+					}
+				}
+			}
+		}else{
+			if(term)
+				return -1; /* this happens if we get a '=' in the stream */
+			idx++;
+			b64str++;
+		}
+		if(idx==4){
+			((uint8_t*)dest)[0] = buffer[0]<<2 | buffer[1]>>4;
+			((uint8_t*)dest)[1] = buffer[1]<<4 | buffer[2]>>2;
+			((uint8_t*)dest)[2] = buffer[2]<<6 | buffer[3];
+			dest = (uint8_t*)dest +3;
+			idx=0;
+		}
+	}
+  finalize:	
+	/* the final touch */
+	if(idx==0)
+		return 0;
+	if(term==1){
+		((uint8_t*)dest)[0] = buffer[0]<<2 | buffer[1]>>4;
+		((uint8_t*)dest)[1] = buffer[1]<<4 | buffer[2]>>2;			
+		return 0;
+	}
+	if(term==2){
+		((uint8_t*)dest)[0] = buffer[0]<<2 | buffer[1]>>4;
+		return 0;
+	}
+	return -1;
+}
diff --git a/base64/base64_dec.h b/base64/base64_dec.h
new file mode 100644
index 0000000..39beff8
--- /dev/null
+++ b/base64/base64_dec.h
@@ -0,0 +1,29 @@
+/* base64_dec.h */
+/*
+ *   This file is part of the AVR-Crypto-Lib.
+ *   Copyright (C) 2006, 2007, 2008  Daniel Otte (daniel.otte@rub.de)
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 3 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+#ifndef BASE64_DEC_H_
+#define BASE64_DEC_H_
+
+#include <stdint.h>
+
+int base64_binlength(char* str, uint8_t strict);
+int base64dec(void* dest, char* b64str, uint8_t strict);
+
+#endif /*BASE64_DEC_H_*/
diff --git a/base64/base64_enc.c b/base64/base64_enc.c
new file mode 100644
index 0000000..400f25c
--- /dev/null
+++ b/base64/base64_enc.c
@@ -0,0 +1,117 @@
+/* base64_enc.c */
+/*
+ *   This file is part of the AVR-Crypto-Lib.
+ *   Copyright (C) 2006, 2007, 2008  Daniel Otte (daniel.otte@rub.de)
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 3 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+/**
+ * base64 encoder (RFC3548)
+ * Author: Daniel Otte
+ * License: GPLv3
+ * 
+ * 
+ */
+
+#include <stdint.h>
+#include "base64_enc.h"
+
+#if 1
+#include <avr/pgmspace.h>
+
+char base64_alphabet[64] PROGMEM = {
+	'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 
+	'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 
+	'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 
+	'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 
+	'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 
+	'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 
+	'w', 'x', 'y', 'z', '0', '1', '2', '3', 
+	'4', '5', '6', '7', '8', '9', '+', '/' }; 
+
+static 
+char bit6toAscii(uint8_t a){
+	a &= (uint8_t)0x3F;
+	return pgm_read_byte(base64_alphabet+a);
+}
+
+#else
+
+static 
+char bit6toAscii(uint8_t a){
+	a &= (uint8_t)0x3F;
+	
+	if(a<=25){
+		return a+'A';
+	} else {
+		if(a<=51){
+			return a-26+'a';
+		} else {
+			if(a<=61){
+				return a-52+'0';
+			} else {
+				if(a==62){
+					return '+';
+				} else {
+					return '/'; /* a == 63 */
+				}
+			}
+		}
+	}
+}
+
+#endif
+
+void base64enc(char* dest, void* src, uint16_t length){
+	uint16_t i,j;
+	uint8_t a[4];
+	for(i=0; i<length/3; ++i){
+		a[0]= (((uint8_t*)src)[i*3+0])>>2;
+		a[1]= (((((uint8_t*)src)[i*3+0])<<4) | ((((uint8_t*)src)[i*3+1])>>4)) & 0x3F;
+		a[2]= (((((uint8_t*)src)[i*3+1])<<2) | ((((uint8_t*)src)[i*3+2])>>6)) & 0x3F;
+		a[3]= (((uint8_t*)src)[i*3+2]) & 0x3F;
+		for(j=0; j<4; ++j){
+			*dest++=bit6toAscii(a[j]);
+		}
+	}
+	/* now we do the rest */
+	switch(length%3){
+		case 0: 
+			break;
+		case 1:
+			a[0]=(((uint8_t*)src)[i*3+0])>>2;
+			a[1]=((((uint8_t*)src)[i*3+0])<<4)&0x3F;
+			*dest++ = bit6toAscii(a[0]);
+			*dest++ = bit6toAscii(a[1]);
+			*dest++ = '=';
+			*dest++ = '=';
+			break;
+		case 2:		
+			a[0]= (((uint8_t*)src)[i*3+0])>>2;
+			a[1]= (((((uint8_t*)src)[i*3+0])<<4) | ((((uint8_t*)src)[i*3+1])>>4)) & 0x3F;
+			a[2]= ((((uint8_t*)src)[i*3+1])<<2) & 0x3F;
+			*dest++ = bit6toAscii(a[0]);
+			*dest++ = bit6toAscii(a[1]);
+			*dest++ = bit6toAscii(a[2]);
+			*dest++ = '=';
+			break;
+		default: /* this will not happen! */
+			break;	
+	}
+/*  finalize: */
+  	*dest='\0';
+}
+
diff --git a/base64/base64_enc.h b/base64/base64_enc.h
new file mode 100644
index 0000000..9065132
--- /dev/null
+++ b/base64/base64_enc.h
@@ -0,0 +1,28 @@
+/* base64_enc.h */
+/*
+ *   This file is part of the AVR-Crypto-Lib.
+ *   Copyright (C) 2006, 2007, 2008  Daniel Otte (daniel.otte@rub.de)
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 3 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+#ifndef BASE64_ENC_H_
+#define BASE64_ENC_H_
+
+#include <stdint.h>
+
+void base64enc(char* dest, void* src, uint16_t length);
+
+#endif /*BASE64_ENC_H_*/
diff --git a/base64_dec.c b/base64_dec.c
deleted file mode 100644
index f057f54..0000000
--- a/base64_dec.c
+++ /dev/null
@@ -1,246 +0,0 @@
-/* base64_dec.c */
-/*
- *   This file is part of the AVR-Crypto-Lib.
- *   Copyright (C) 2006, 2007, 2008  Daniel Otte (daniel.otte@rub.de)
- *
- *   This program is free software: you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation, either version 3 of the License, or
- *   (at your option) any later version.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-
-/**
- * base64 decoder (RFC3548)
- * Author: Daniel Otte
- * License: GPLv3
- * 
- * 
- */
-
-#include <stdint.h>
-#include "base64_dec.h"
-
-#include "test_src/cli.h"
-
-/*
- #define USE_GCC_EXTENSION
-*/
-#if 1
-
-#ifdef USE_GCC_EXTENSION
-
-static
-int ascii2bit6(char a){
-	switch(a){
-		case 'A'...'Z':
-			return a-'A';
-		case 'a'...'z':
-			return a-'a'+26;
-		case '0'...'9':
-			return a-'0'+52;
-		case '+':
-		case '-':
-			return 62;
-		case '/':
-		case '_':
-			return 63;
-		default:
-			return -1;
-	}
-}
-
-#else
-
-static
-uint8_t ascii2bit6(char a){
-	int r;
-	switch(a>>4){
-		case 0x5:
-		case 0x4: 
-			r=a-'A';
-			if(r<0 || r>25){
-				return -1;
-			} else {
-				return r;
-			}
-		case 0x7:
-		case 0x6: 
-			r=a-'a';
-			if(r<0 || r>25){
-				return -1;
-			} else {
-				return r+26;
-			}
-			break;
-		case 0x3:
-			if(a>'9')
-				return -1;
-			return a-'0'+52;
-		default:
-			break;	
-	}
-	switch (a){
-		case '+':
-		case '-':
-			return 62;
-		case '/':
-		case '_':
-			return 63;
-		default:
-			return 0xff;
-	}
-}
-
-#endif
-
-#else
-
-static 
-uint8_t ascii2bit6(uint8_t a){
-	if(a>='A' && a<='Z'){
-		return a-'A';
-	} else {
-		if(a>='a' && a<= 'z'){
-			return a-'a'+26;
-		} else {
-			if(a>='0' && a<='9'){
-				return a-'0'+52;
-			} else {
-				if(a=='+' || a=='-'){
-					return 62;
-				} else {
-					if(a=='/' || a=='_'){
-						return 63;
-					} else {
-						return 0xff;
-					}
-				}
-			}
-		}
-	}
-}
-
-#endif
-
-int base64_binlength(char* str, uint8_t strict){
-	int l=0;
-	uint8_t term=0;
-	for(;;){
-		if(*str=='\0')
-			break;
-		if(*str=='\n' || *str=='\r'){
-			str++;
-			continue;
-		}
-		if(*str=='='){
-			term++;
-			str++;
-			if(term==2){
-				break;
-			}
-			continue;
-		}
-		if(term)
-			return -1;
-		if(ascii2bit6(*str)==-1){
-			if(strict)
-				return -1;
-		} else {
-			l++;
-		}
-		str++;
-	}
-	switch(term){
-		case 0:
-			if(l%4!=0)
-				return -1;
-			return l/4*3;
-		case 1:
-			if(l%4!=3)
-				return -1;
-			return (l+1)/4*3-1;
-		case 2:
-			if(l%4!=2)
-				return -1;
-			return (l+2)/4*3-2;
-		default:
-			return -1;
-	}
-}
-
-/*
-  |543210543210543210543210|
-  |765432107654321076543210|
-
-        .      .      .     .
-  |54321054|32105432|10543210|
-  |76543210|76543210|76543210|
-
-*/
-
-int base64dec(void* dest, char* b64str, uint8_t strict){
-	uint8_t buffer[4];
-	uint8_t idx=0;
-	uint8_t term=0;
-	for(;;){
-//		cli_putstr_P(PSTR("\r\n  DBG: got 0x"));
-//		cli_hexdump(b64str, 1);
-		buffer[idx]= ascii2bit6(*b64str);
-//		cli_putstr_P(PSTR(" --> 0x"));
-//		cli_hexdump(buffer+idx, 1);
-		
-		if(buffer[idx]==0xFF){
-			if(*b64str=='='){
-				term++;
-				b64str++;
-				if(term==2)
-					goto finalize; /* definitly the end */
-			}else{
-				if(*b64str == '\0'){
-					goto finalize; /* definitly the end */
-				}else{
-					if(*b64str == '\r' || *b64str == '\n' || !(strict)){
-						b64str++; /* charcters that we simply ignore */
-					}else{
-						return -1;
-					}
-				}
-			}
-		}else{
-			if(term)
-				return -1; /* this happens if we get a '=' in the stream */
-			idx++;
-			b64str++;
-		}
-		if(idx==4){
-			((uint8_t*)dest)[0] = buffer[0]<<2 | buffer[1]>>4;
-			((uint8_t*)dest)[1] = buffer[1]<<4 | buffer[2]>>2;
-			((uint8_t*)dest)[2] = buffer[2]<<6 | buffer[3];
-			dest = (uint8_t*)dest +3;
-			idx=0;
-		}
-	}
-  finalize:	
-	/* the final touch */
-	if(idx==0)
-		return 0;
-	if(term==1){
-		((uint8_t*)dest)[0] = buffer[0]<<2 | buffer[1]>>4;
-		((uint8_t*)dest)[1] = buffer[1]<<4 | buffer[2]>>2;			
-		return 0;
-	}
-	if(term==2){
-		((uint8_t*)dest)[0] = buffer[0]<<2 | buffer[1]>>4;
-		return 0;
-	}
-	return -1;
-}
diff --git a/base64_dec.h b/base64_dec.h
deleted file mode 100644
index 39beff8..0000000
--- a/base64_dec.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/* base64_dec.h */
-/*
- *   This file is part of the AVR-Crypto-Lib.
- *   Copyright (C) 2006, 2007, 2008  Daniel Otte (daniel.otte@rub.de)
- *
- *   This program is free software: you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation, either version 3 of the License, or
- *   (at your option) any later version.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-
-#ifndef BASE64_DEC_H_
-#define BASE64_DEC_H_
-
-#include <stdint.h>
-
-int base64_binlength(char* str, uint8_t strict);
-int base64dec(void* dest, char* b64str, uint8_t strict);
-
-#endif /*BASE64_DEC_H_*/
diff --git a/base64_enc.c b/base64_enc.c
deleted file mode 100644
index 400f25c..0000000
--- a/base64_enc.c
+++ /dev/null
@@ -1,117 +0,0 @@
-/* base64_enc.c */
-/*
- *   This file is part of the AVR-Crypto-Lib.
- *   Copyright (C) 2006, 2007, 2008  Daniel Otte (daniel.otte@rub.de)
- *
- *   This program is free software: you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation, either version 3 of the License, or
- *   (at your option) any later version.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-
-/**
- * base64 encoder (RFC3548)
- * Author: Daniel Otte
- * License: GPLv3
- * 
- * 
- */
-
-#include <stdint.h>
-#include "base64_enc.h"
-
-#if 1
-#include <avr/pgmspace.h>
-
-char base64_alphabet[64] PROGMEM = {
-	'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 
-	'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 
-	'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 
-	'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 
-	'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 
-	'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 
-	'w', 'x', 'y', 'z', '0', '1', '2', '3', 
-	'4', '5', '6', '7', '8', '9', '+', '/' }; 
-
-static 
-char bit6toAscii(uint8_t a){
-	a &= (uint8_t)0x3F;
-	return pgm_read_byte(base64_alphabet+a);
-}
-
-#else
-
-static 
-char bit6toAscii(uint8_t a){
-	a &= (uint8_t)0x3F;
-	
-	if(a<=25){
-		return a+'A';
-	} else {
-		if(a<=51){
-			return a-26+'a';
-		} else {
-			if(a<=61){
-				return a-52+'0';
-			} else {
-				if(a==62){
-					return '+';
-				} else {
-					return '/'; /* a == 63 */
-				}
-			}
-		}
-	}
-}
-
-#endif
-
-void base64enc(char* dest, void* src, uint16_t length){
-	uint16_t i,j;
-	uint8_t a[4];
-	for(i=0; i<length/3; ++i){
-		a[0]= (((uint8_t*)src)[i*3+0])>>2;
-		a[1]= (((((uint8_t*)src)[i*3+0])<<4) | ((((uint8_t*)src)[i*3+1])>>4)) & 0x3F;
-		a[2]= (((((uint8_t*)src)[i*3+1])<<2) | ((((uint8_t*)src)[i*3+2])>>6)) & 0x3F;
-		a[3]= (((uint8_t*)src)[i*3+2]) & 0x3F;
-		for(j=0; j<4; ++j){
-			*dest++=bit6toAscii(a[j]);
-		}
-	}
-	/* now we do the rest */
-	switch(length%3){
-		case 0: 
-			break;
-		case 1:
-			a[0]=(((uint8_t*)src)[i*3+0])>>2;
-			a[1]=((((uint8_t*)src)[i*3+0])<<4)&0x3F;
-			*dest++ = bit6toAscii(a[0]);
-			*dest++ = bit6toAscii(a[1]);
-			*dest++ = '=';
-			*dest++ = '=';
-			break;
-		case 2:		
-			a[0]= (((uint8_t*)src)[i*3+0])>>2;
-			a[1]= (((((uint8_t*)src)[i*3+0])<<4) | ((((uint8_t*)src)[i*3+1])>>4)) & 0x3F;
-			a[2]= ((((uint8_t*)src)[i*3+1])<<2) & 0x3F;
-			*dest++ = bit6toAscii(a[0]);
-			*dest++ = bit6toAscii(a[1]);
-			*dest++ = bit6toAscii(a[2]);
-			*dest++ = '=';
-			break;
-		default: /* this will not happen! */
-			break;	
-	}
-/*  finalize: */
-  	*dest='\0';
-}
-
diff --git a/base64_enc.h b/base64_enc.h
deleted file mode 100644
index 9065132..0000000
--- a/base64_enc.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/* base64_enc.h */
-/*
- *   This file is part of the AVR-Crypto-Lib.
- *   Copyright (C) 2006, 2007, 2008  Daniel Otte (daniel.otte@rub.de)
- *
- *   This program is free software: you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation, either version 3 of the License, or
- *   (at your option) any later version.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-
-#ifndef BASE64_ENC_H_
-#define BASE64_ENC_H_
-
-#include <stdint.h>
-
-void base64enc(char* dest, void* src, uint16_t length);
-
-#endif /*BASE64_ENC_H_*/
diff --git a/bmw/bmw_large.c b/bmw/bmw_large.c
new file mode 100644
index 0000000..10e65f6
--- /dev/null
+++ b/bmw/bmw_large.c
@@ -0,0 +1,468 @@
+/* bmw_large.c */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2009  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/*
+ * \file    bmw_large.c
+ * \author  Daniel Otte
+ * \email   daniel.otte@rub.de
+ * \date    2009-04-27
+ * \license GPLv3 or later
+ * 
+ */
+
+#include <stdint.h>
+#include <string.h>
+#include <avr/pgmspace.h>
+#include "bmw_large.h"
+
+#define SHL64(a,n) ((a)<<(n))
+#define SHR64(a,n) ((a)>>(n))
+#define ROTL64(a,n) (((a)<<(n))|((a)>>(64-(n))))
+#define ROTR64(a,n) (((a)>>(n))|((a)<<(64-(n))))
+
+#define BUG24 0
+#define F0_HACK 1
+#define DEBUG 0
+#if DEBUG
+ #include "cli.h"
+ 
+ void ctx_dump(const bmw_large_ctx_t* ctx){
+ 	uint8_t i;
+	cli_putstr_P(PSTR("\r\n==== ctx dump ===="));
+	for(i=0; i<16;++i){
+		cli_putstr_P(PSTR("\r\n h["));
+		cli_hexdump(&i, 1);
+		cli_putstr_P(PSTR("] = "));
+		cli_hexdump_rev(&(ctx->h[i]), 8);
+	}
+	cli_putstr_P(PSTR("\r\n counter = "));
+	cli_hexdump(&(ctx->counter), 4);
+ }
+ 
+ void dump_x(const uint64_t* q, uint8_t elements, char x){
+	uint8_t i;
+ 	cli_putstr_P(PSTR("\r\n==== "));
+	cli_putc(x);
+	cli_putstr_P(PSTR(" dump ===="));
+	for(i=0; i<elements;++i){
+		cli_putstr_P(PSTR("\r\n "));
+		cli_putc(x);
+		cli_putstr_P(PSTR("["));
+		cli_hexdump(&i, 1);
+		cli_putstr_P(PSTR("] = "));
+		cli_hexdump_rev(&(q[i]), 8);
+	}
+ }
+#else
+ #define ctx_dump(x)
+ #define dump_x(a,b,c)
+#endif
+
+uint64_t bmw_large_s0(uint64_t x){
+	uint64_t r;
+	r =   SHR64(x, 1)
+		^ SHL64(x, 3)
+		^ ROTL64(x, 4)
+		^ ROTR64(x, 64-37);
+	return r;	
+}
+
+uint64_t bmw_large_s1(uint64_t x){
+	uint64_t r;
+	r =   SHR64(x, 1)
+		^ SHL64(x, 2)
+		^ ROTL64(x,13)
+		^ ROTR64(x,64-43);
+	return r;	
+}
+
+uint64_t bmw_large_s2(uint64_t x){
+	uint64_t r;
+	r =   SHR64(x, 2)
+		^ SHL64(x, 1)
+		^ ROTL64(x, 19)
+		^ ROTR64(x, 64-53);
+	return r;	
+}
+
+uint64_t bmw_large_s3(uint64_t x){
+	uint64_t r;
+	r =   SHR64(x, 2)
+		^ SHL64(x, 2)
+		^ ROTL64(x, 28)
+		^ ROTR64(x, 64-59);
+	return r;	
+}
+
+uint64_t bmw_large_s4(uint64_t x){
+	uint64_t r;
+	r =   SHR64(x, 1)
+		^ x;
+	return r;	
+}
+
+uint64_t bmw_large_s5(uint64_t x){
+	uint64_t r;
+	r =   SHR64(x, 2)
+		^ x;
+	return r;	
+}
+
+uint64_t bmw_large_r1(uint64_t x){
+	uint64_t r;
+	r =   ROTL64(x, 5);
+	return r;	
+}
+
+uint64_t bmw_large_r2(uint64_t x){
+	uint64_t r;
+	r =   ROTL64(x, 11);
+	return r;	
+}
+
+uint64_t bmw_large_r3(uint64_t x){
+	uint64_t r;
+	r =   ROTL64(x, 27);
+	return r;	
+}
+
+uint64_t bmw_large_r4(uint64_t x){
+	uint64_t r;
+	r =   ROTL64(x, 32);
+	return r;	
+}
+
+uint64_t bmw_large_r5(uint64_t x){
+	uint64_t r;
+	r =   ROTR64(x, 64-37);
+	return r;	
+}
+
+uint64_t bmw_large_r6(uint64_t x){
+	uint64_t r;
+	r =   ROTR64(x, 64-43);
+	return r;	
+}
+
+uint64_t bmw_large_r7(uint64_t x){
+	uint64_t r;
+	r =   ROTR64(x, 64-53);
+	return r;	
+}
+/*
+#define K    0x0555555555555555LL
+#define MASK 0xFFFFFFFFFFFFFFFFLL
+static
+uint64_t k_lut[] PROGMEM = {
+	16LL*K, 17LL*K, 18LL*K, 19LL*K, 
+	20LL*K, 21LL*K, 22LL*K, 23LL*K,
+	24LL*K, 25LL*K, 26LL*K, 27LL*K,
+	28LL*K, 29LL*K, 30LL*K, 31LL*K };
+*/	
+/* the same as above but precomputed to avoid compiler warnings */
+static
+uint64_t k_lut[] PROGMEM = {
+	0x5555555555555550LL, 0x5aaaaaaaaaaaaaa5LL, 0x5ffffffffffffffaLL,
+	0x655555555555554fLL, 0x6aaaaaaaaaaaaaa4LL, 0x6ffffffffffffff9LL,
+	0x755555555555554eLL, 0x7aaaaaaaaaaaaaa3LL, 0x7ffffffffffffff8LL,
+	0x855555555555554dLL, 0x8aaaaaaaaaaaaaa2LL, 0x8ffffffffffffff7LL,
+	0x955555555555554cLL, 0x9aaaaaaaaaaaaaa1LL, 0x9ffffffffffffff6LL,
+	0xa55555555555554bLL };
+
+uint64_t bmw_large_expand1(uint8_t j, const uint64_t* q, const void* m){
+	uint64_t(*s[])(uint64_t) = {bmw_large_s1, bmw_large_s2, bmw_large_s3, bmw_large_s0};
+	union{
+		uint64_t v64;
+		uint32_t v32[2];
+	} r;
+	uint8_t i;
+	/* r = 0x0555555555555555LL*(j+16); */
+	r.v32[0] = pgm_read_dword(((uint8_t*)k_lut+8*j));
+	r.v32[1] = pgm_read_dword(((uint8_t*)k_lut+8*j+4));
+	for(i=0; i<16; ++i){
+		r.v64 += s[i%4](q[j+i]);
+	}
+	r.v64 += ((uint64_t*)m)[j];
+	r.v64 += ((uint64_t*)m)[j+3];
+	r.v64 -= ((uint64_t*)m)[j+10];
+	return r.v64;
+}
+
+uint64_t bmw_large_expand2(uint8_t j, const uint64_t* q, const void* m){
+	uint64_t(*rf[])(uint64_t) = {bmw_large_r1, bmw_large_r2, bmw_large_r3,
+	                             bmw_large_r4, bmw_large_r5, bmw_large_r6,
+							     bmw_large_r7};
+	union{
+		uint64_t v64;
+		uint32_t v32[2];
+	} r;
+	uint8_t i;
+	/* r = 0x0555555555555555LL*(j+16); */
+	r.v32[0] = pgm_read_dword(((uint8_t*)k_lut+8*j));
+	r.v32[1] = pgm_read_dword(((uint8_t*)k_lut+8*j+4));
+	for(i=0; i<14; i+=2){
+		r.v64 += q[j+i];
+	}
+	for(i=0; i<14; i+=2){
+		r.v64 += rf[i/2](q[j+i+1]);
+	}
+	r.v64 += bmw_large_s5(q[j+14]);
+	r.v64 += bmw_large_s4(q[j+15]);
+	r.v64 += ((uint64_t*)m)[j];
+	r.v64 += ((uint64_t*)m)[(j+3)%16];
+	r.v64 -= ((uint64_t*)m)[(j+10)%16];
+	return r.v64;
+}
+
+#if F0_HACK
+static
+uint8_t f0_lut[] PROGMEM ={
+	 5<<1, ( 7<<1)+1, (10<<1)+0, (13<<1)+0, (14<<1)+0,
+	 6<<1, ( 8<<1)+1, (11<<1)+0, (14<<1)+0, (15<<1)+1,
+	 0<<1, ( 7<<1)+0, ( 9<<1)+0, (12<<1)+1, (15<<1)+0,
+	 0<<1, ( 1<<1)+1, ( 8<<1)+0, (10<<1)+1, (13<<1)+0,
+	 1<<1, ( 2<<1)+0, ( 9<<1)+0, (11<<1)+1, (14<<1)+1,
+	 3<<1, ( 2<<1)+1, (10<<1)+0, (12<<1)+1, (15<<1)+0,
+	 4<<1, ( 0<<1)+1, ( 3<<1)+1, (11<<1)+1, (13<<1)+0, 
+	 1<<1, ( 4<<1)+1, ( 5<<1)+1, (12<<1)+1, (14<<1)+1,
+	 2<<1, ( 5<<1)+1, ( 6<<1)+1, (13<<1)+0, (15<<1)+1,
+	 0<<1, ( 3<<1)+1, ( 6<<1)+0, ( 7<<1)+1, (14<<1)+0,
+	 8<<1, ( 1<<1)+1, ( 4<<1)+1, ( 7<<1)+1, (15<<1)+0,
+	 8<<1, ( 0<<1)+1, ( 2<<1)+1, ( 5<<1)+1, ( 9<<1)+0,
+	 1<<1, ( 3<<1)+0, ( 6<<1)+1, ( 9<<1)+1, (10<<1)+0,
+	 2<<1, ( 4<<1)+0, ( 7<<1)+0, (10<<1)+0, (11<<1)+0,
+	 3<<1, ( 5<<1)+1, ( 8<<1)+0, (11<<1)+1, (12<<1)+1,
+	12<<1, ( 4<<1)+1, ( 6<<1)+1, ( 9<<1)+1, (13<<1)+0
+};
+
+void bmw_large_f0(uint64_t* q, uint64_t* h, const void* m){
+	uint8_t i,j=-1,v,sign,l=0;
+	uint64_t(*s[])(uint64_t)={ bmw_large_s0, bmw_large_s1, bmw_large_s2,
+	                           bmw_large_s3, bmw_large_s4 };
+	for(i=0; i<16; ++i){
+		h[i] ^= ((uint64_t*)m)[i];
+	}
+	dump_x(h, 16, 'T');
+//	memset(q, 0, 4*16);
+	for(i=0; i<5*16; ++i){
+		v = pgm_read_byte(f0_lut+i);
+		sign = v&1;
+		v >>=1;
+		if(i==l){
+			j++;
+			l+=5;
+			q[j] = h[v];
+			continue;
+		}
+		if(sign){
+			q[j] -= h[v];
+		}else{
+			q[j] += h[v];
+		}
+	}
+	dump_x(q, 16, 'W');
+	for(i=0; i<16; ++i){
+		q[i] = s[i%5](q[i]);
+	}	
+}
+
+#else
+void bmw_large_f0(uint64_t* q, uint64_t* h, const void* m){
+	uint8_t i;
+	uint64_t(*s[])(uint64_t)={ bmw_large_s0, bmw_large_s1, bmw_large_s2,
+	                           bmw_large_s3, bmw_large_s4 };
+	for(i=0; i<16; ++i){
+		h[i] ^= ((uint64_t*)m)[i];
+	}
+	dump_x(t, 16, 'T');
+	q[ 0] = (h[ 5] - h[ 7] + h[10] + h[13] + h[14]);
+	q[ 1] = (h[ 6] - h[ 8] + h[11] + h[14] - h[15]);
+	q[ 2] = (h[ 0] + h[ 7] + h[ 9] - h[12] + h[15]);
+	q[ 3] = (h[ 0] - h[ 1] + h[ 8] - h[10] + h[13]);
+	q[ 4] = (h[ 1] + h[ 2] + h[ 9] - h[11] - h[14]);
+	q[ 5] = (h[ 3] - h[ 2] + h[10] - h[12] + h[15]);
+	q[ 6] = (h[ 4] - h[ 0] - h[ 3] - h[11] + h[13]); 
+	q[ 7] = (h[ 1] - h[ 4] - h[ 5] - h[12] - h[14]);
+	q[ 8] = (h[ 2] - h[ 5] - h[ 6] + h[13] - h[15]);
+	q[ 9] = (h[ 0] - h[ 3] + h[ 6] - h[ 7] + h[14]);
+	q[10] = (h[ 8] - h[ 1] - h[ 4] - h[ 7] + h[15]);
+	q[11] = (h[ 8] - h[ 0] - h[ 2] - h[ 5] + h[ 9]);
+	q[12] = (h[ 1] + h[ 3] - h[ 6] - h[ 9] + h[10]);
+	q[13] = (h[ 2] + h[ 4] + h[ 7] + h[10] + h[11]);
+	q[14] = (h[ 3] - h[ 5] + h[ 8] - h[11] - h[12]);
+	q[15] = (h[12] - h[ 4] - h[ 6] - h[ 9] + h[13]); 
+	dump_x(q, 16, 'W');
+	for(i=0; i<16; ++i){
+		q[i] = s[i%5](q[i]);
+	}	
+}
+#endif
+
+void bmw_large_f1(uint64_t* q, const void* m){
+	uint8_t i;
+	q[16] = bmw_large_expand1(0, q, m);
+	q[17] = bmw_large_expand1(1, q, m);
+	for(i=2; i<16; ++i){
+		q[16+i] = bmw_large_expand2(i, q, m);
+	}
+}
+
+void bmw_large_f2(uint64_t* h, const uint64_t* q, const void* m){
+	uint64_t xl=0, xh;
+	uint8_t i;
+	for(i=16;i<24;++i){
+		xl ^= q[i];
+	}
+	xh = xl;
+	for(i=24;i<32;++i){
+		xh ^= q[i];
+	}
+#if DEBUG	
+	cli_putstr_P(PSTR("\r\n XL = "));
+	cli_hexdump_rev(&xl, 4);
+	cli_putstr_P(PSTR("\r\n XH = "));
+	cli_hexdump_rev(&xh, 4);
+#endif
+	memcpy(h, m, 16*8);
+	h[0] ^= SHL64(xh, 5) ^ SHR64(q[16], 5);
+	h[1] ^= SHR64(xh, 7) ^ SHL64(q[17], 8);
+	h[2] ^= SHR64(xh, 5) ^ SHL64(q[18], 5);
+	h[3] ^= SHR64(xh, 1) ^ SHL64(q[19], 5);
+	h[4] ^= SHR64(xh, 3) ^ q[20];
+	h[5] ^= SHL64(xh, 6) ^ SHR64(q[21], 6);
+	h[6] ^= SHR64(xh, 4) ^ SHL64(q[22], 6);
+	h[7] ^= SHR64(xh,11) ^ SHL64(q[23], 2);
+	for(i=0; i<8; ++i){
+		h[i] += xl ^ q[24+i] ^ q[i];
+	}
+	for(i=0; i<8; ++i){
+		h[8+i] ^= xh ^ q[24+i];
+		h[8+i] += ROTL64(h[(4+i)%8],i+9);
+	}
+	h[ 8] += SHL64(xl, 8) ^ q[23] ^ q[ 8];
+	h[ 9] += SHR64(xl, 6) ^ q[16] ^ q[ 9];
+	h[10] += SHL64(xl, 6) ^ q[17] ^ q[10];
+	h[11] += SHL64(xl, 4) ^ q[18] ^ q[11];
+	h[12] += SHR64(xl, 3) ^ q[19] ^ q[12];
+	h[13] += SHR64(xl, 4) ^ q[20] ^ q[13];
+	h[14] += SHR64(xl, 7) ^ q[21] ^ q[14];
+	h[15] += SHR64(xl, 2) ^ q[22] ^ q[15];
+}
+
+void bmw_large_nextBlock(bmw_large_ctx_t* ctx, const void* block){
+	uint64_t q[32];
+	dump_x(block, 16, 'M');
+	bmw_large_f0(q, ctx->h, block);
+	dump_x(q, 16, 'Q');
+	bmw_large_f1(q, block);
+	dump_x(q, 32, 'Q');
+	bmw_large_f2(ctx->h, q, block);
+	ctx->counter += 1;
+	ctx_dump(ctx);
+}
+
+void bmw_large_lastBlock(bmw_large_ctx_t* ctx, const void* block, uint16_t length_b){
+	uint8_t buffer[128];
+	while(length_b >= BMW_LARGE_BLOCKSIZE){
+		bmw_large_nextBlock(ctx, block);
+		length_b -= BMW_LARGE_BLOCKSIZE;
+		block = (uint8_t*)block + BMW_LARGE_BLOCKSIZE_B;
+	}
+	memset(buffer, 0, 128);
+	memcpy(buffer, block, (length_b+7)/8);
+	buffer[length_b>>3] |= 0x80 >> (length_b&0x07);
+	if(length_b+1>128*8-64){
+		bmw_large_nextBlock(ctx, buffer);
+		memset(buffer, 0, 128-8);
+		ctx->counter -= 1;
+	}
+	*((uint64_t*)&(buffer[128-8])) = (uint64_t)(ctx->counter*1024LL)+(uint64_t)length_b;
+	bmw_large_nextBlock(ctx, buffer);
+}
+
+void bmw384_init(bmw384_ctx_t* ctx){
+	uint8_t i;
+	ctx->h[0] = 0x0001020304050607LL;
+	for(i=1; i<16; ++i){
+		ctx->h[i] = ctx->h[i-1]+ 0x0808080808080808LL;
+	}
+#if BUG24	
+	ctx->h[6] = 0x3031323324353637LL;
+#endif
+	ctx->counter=0;
+	ctx_dump(ctx);
+}
+
+void bmw512_init(bmw512_ctx_t* ctx){
+	uint8_t i;
+	ctx->h[0] = 0x8081828384858687LL;
+	for(i=1; i<16; ++i){
+		ctx->h[i] = ctx->h[i-1]+ 0x0808080808080808LL;
+	}
+	ctx->counter=0;
+	ctx_dump(ctx);
+}
+
+void bmw384_nextBlock(bmw384_ctx_t* ctx, const void* block){
+	bmw_large_nextBlock(ctx, block);
+}
+
+void bmw512_nextBlock(bmw512_ctx_t* ctx, const void* block){
+	bmw_large_nextBlock(ctx, block);
+}
+
+void bmw384_lastBlock(bmw384_ctx_t* ctx, const void* block, uint16_t length_b){
+	bmw_large_lastBlock(ctx, block, length_b);
+}
+
+void bmw512_lastBlock(bmw512_ctx_t* ctx, const void* block, uint16_t length_b){
+	bmw_large_lastBlock(ctx, block, length_b);
+}
+
+void bmw384_ctx2hash(void* dest, const bmw384_ctx_t* ctx){
+	memcpy(dest, &(ctx->h[10]), 384/8);
+}
+
+void bmw512_ctx2hash(void* dest, const bmw512_ctx_t* ctx){
+	memcpy(dest, &(ctx->h[8]), 512/8);
+}
+
+void bmw384(void* dest, const void* msg, uint32_t length_b){
+	bmw_large_ctx_t ctx;
+	bmw384_init(&ctx);
+	while(length_b>=BMW_LARGE_BLOCKSIZE){
+		bmw_large_nextBlock(&ctx, msg);
+		length_b -= BMW_LARGE_BLOCKSIZE;
+		msg = (uint8_t*)msg + BMW_LARGE_BLOCKSIZE_B;
+	}
+	bmw_large_lastBlock(&ctx, msg, length_b);
+	bmw384_ctx2hash(dest, &ctx);
+}
+
+void bmw512(void* dest, const void* msg, uint32_t length_b){
+	bmw_large_ctx_t ctx;
+	bmw512_init(&ctx);
+	while(length_b>=BMW_LARGE_BLOCKSIZE){
+		bmw_large_nextBlock(&ctx, msg);
+		length_b -= BMW_LARGE_BLOCKSIZE;
+		msg = (uint8_t*)msg + BMW_LARGE_BLOCKSIZE_B;
+	}
+	bmw_large_lastBlock(&ctx, msg, length_b);
+	bmw512_ctx2hash(dest, &ctx);
+}
+
diff --git a/bmw/bmw_large.h b/bmw/bmw_large.h
new file mode 100644
index 0000000..fba01fb
--- /dev/null
+++ b/bmw/bmw_large.h
@@ -0,0 +1,65 @@
+/* bmw_large.h */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2009  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/*
+ * \file    bmw_large.h
+ * \author  Daniel Otte
+ * \email   daniel.otte@rub.de
+ * \date    2009-04-27
+ * \license GPLv3 or later
+ * 
+ */
+#ifndef BMW_LARGE_H_
+#define BMW_LARGE_H_
+
+#include <stdint.h>
+
+#define BMW_LARGE_BLOCKSIZE   1024
+#define BMW_LARGE_BLOCKSIZE_B ((BMW_LARGE_BLOCKSIZE+7)/8)
+#define BMW384_BLOCKSIZE      BMW_LARGE_BLOCKSIZE
+#define BMW384_BLOCKSIZE_B    BMW_LARGE_BLOCKSIZE_B
+#define BMW512_BLOCKSIZE      BMW_LARGE_BLOCKSIZE
+#define BMW512_BLOCKSIZE_B    BMW_LARGE_BLOCKSIZE_B
+
+typedef struct {
+	uint64_t h[16];
+	uint32_t counter;
+} bmw_large_ctx_t;
+
+typedef bmw_large_ctx_t bmw384_ctx_t;
+typedef bmw_large_ctx_t bmw512_ctx_t;
+
+void bmw384_init(bmw384_ctx_t* ctx);
+void bmw512_init(bmw512_ctx_t* ctx);
+
+void bmw_large_nextBlock(bmw_large_ctx_t* ctx, const void* block);
+void bmw_large_lastBlock(bmw_large_ctx_t* ctx, const void* block, uint16_t length_b);
+
+void bmw384_nextBlock(bmw384_ctx_t* ctx, const void* block);
+void bmw384_lastBlock(bmw384_ctx_t* ctx, const void* block, uint16_t length_b);
+
+void bmw512_nextBlock(bmw512_ctx_t* ctx, const void* block);
+void bmw512_lastBlock(bmw512_ctx_t* ctx, const void* block, uint16_t length_b);
+
+void bmw384_ctx2hash(void* dest, const bmw384_ctx_t* ctx);
+void bmw512_ctx2hash(void* dest, const bmw512_ctx_t* ctx);
+
+void bmw384(void* dest, const void* msg, uint32_t length_b);
+void bmw512(void* dest, const void* msg, uint32_t length_b);
+
+#endif /* BMW_LARGE_H_ */
diff --git a/bmw/bmw_small.c b/bmw/bmw_small.c
new file mode 100644
index 0000000..aefd592
--- /dev/null
+++ b/bmw/bmw_small.c
@@ -0,0 +1,461 @@
+/* bmw_small.c */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2009  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/*
+ * \file    bmw_small.c
+ * \author  Daniel Otte
+ * \email   daniel.otte@rub.de
+ * \date    2009-04-27
+ * \license GPLv3 or later
+ * 
+ */
+
+#include <stdint.h>
+#include <string.h>
+#include <avr/pgmspace.h>
+#include "bmw_small.h"
+
+
+#define SHL32(a,n) ((a)<<(n))
+#define SHR32(a,n) ((a)>>(n))
+#define ROTL32(a,n) (((a)<<(n))|((a)>>(32-(n))))
+#define ROTR32(a,n) (((a)>>(n))|((a)<<(32-(n))))
+
+#define BUG24   0
+#define F0_HACK 1
+
+#define DEBUG 0
+#if DEBUG
+ #include "cli.h"
+ 
+ void ctx_dump(const bmw_small_ctx_t* ctx){
+ 	uint8_t i;
+	cli_putstr_P(PSTR("\r\n==== ctx dump ===="));
+	for(i=0; i<16;++i){
+		cli_putstr_P(PSTR("\r\n h["));
+		cli_hexdump(&i, 1);
+		cli_putstr_P(PSTR("] = "));
+		cli_hexdump_rev(&(ctx->h[i]), 4);
+	}
+	cli_putstr_P(PSTR("\r\n counter = "));
+	cli_hexdump(&(ctx->counter), 4);
+ }
+ 
+ void dump_x(const uint32_t* q, uint8_t elements, char x){
+	uint8_t i;
+ 	cli_putstr_P(PSTR("\r\n==== "));
+	cli_putc(x);
+	cli_putstr_P(PSTR(" dump ===="));
+	for(i=0; i<elements;++i){
+		cli_putstr_P(PSTR("\r\n "));
+		cli_putc(x);
+		cli_putstr_P(PSTR("["));
+		cli_hexdump(&i, 1);
+		cli_putstr_P(PSTR("] = "));
+		cli_hexdump_rev(&(q[i]), 4);
+	}
+ }
+#else
+ #define ctx_dump(x)
+ #define dump_x(a,b,c)
+#endif
+
+uint32_t bmw_small_s0(uint32_t x){
+	uint32_t r;
+	r =   SHR32(x, 1)
+		^ SHL32(x, 3)
+		^ ROTL32(x, 4)
+		^ ROTR32(x, 13);
+	return r;	
+}
+
+uint32_t bmw_small_s1(uint32_t x){
+	uint32_t r;
+	r =   SHR32(x, 1)
+		^ SHL32(x, 2)
+		^ ROTL32(x, 8)
+		^ ROTR32(x, 9);
+	return r;	
+}
+
+uint32_t bmw_small_s2(uint32_t x){
+	uint32_t r;
+	r =   SHR32(x, 2)
+		^ SHL32(x, 1)
+		^ ROTL32(x, 12)
+		^ ROTR32(x, 7);
+	return r;	
+}
+
+uint32_t bmw_small_s3(uint32_t x){
+	uint32_t r;
+	r =   SHR32(x, 2)
+		^ SHL32(x, 2)
+		^ ROTL32(x, 15)
+		^ ROTR32(x, 3);
+	return r;	
+}
+
+uint32_t bmw_small_s4(uint32_t x){
+	uint32_t r;
+	r =   SHR32(x, 1)
+		^ x;
+	return r;	
+}
+
+uint32_t bmw_small_s5(uint32_t x){
+	uint32_t r;
+	r =   SHR32(x, 2)
+		^ x;
+	return r;	
+}
+
+uint32_t bmw_small_r1(uint32_t x){
+	uint32_t r;
+	r =   ROTL32(x, 3);
+	return r;	
+}
+
+uint32_t bmw_small_r2(uint32_t x){
+	uint32_t r;
+	r =   ROTL32(x, 7);
+	return r;	
+}
+
+uint32_t bmw_small_r3(uint32_t x){
+	uint32_t r;
+	r =   ROTL32(x, 13);
+	return r;	
+}
+
+uint32_t bmw_small_r4(uint32_t x){
+	uint32_t r;
+	r =   ROTL32(x, 16);
+	return r;	
+}
+
+uint32_t bmw_small_r5(uint32_t x){
+	uint32_t r;
+	r =   ROTR32(x, 13);
+	return r;	
+}
+
+uint32_t bmw_small_r6(uint32_t x){
+	uint32_t r;
+	r =   ROTR32(x, 9);
+	return r;	
+}
+
+uint32_t bmw_small_r7(uint32_t x){
+	uint32_t r;
+	r =   ROTR32(x, 5);
+	return r;	
+}
+/*
+#define K 0x05555555L
+static
+uint32_t k_lut[] PROGMEM = {
+	16L*K, 17L*K, 18L*K, 19L*K, 20L*K, 21L*K, 22L*K, 23L*K,
+	24L*K, 25L*K, 26L*K, 27L*K, 28L*K, 29L*K, 30L*K, 31L*K
+};
+*/
+/* same as above but precomputed to avoid compiler warnings */
+
+static
+uint32_t k_lut[] PROGMEM = {
+	0x55555550L, 0x5aaaaaa5L, 0x5ffffffaL, 
+	0x6555554fL, 0x6aaaaaa4L, 0x6ffffff9L, 
+	0x7555554eL, 0x7aaaaaa3L, 0x7ffffff8L, 
+	0x8555554dL, 0x8aaaaaa2L, 0x8ffffff7L,
+	0x9555554cL, 0x9aaaaaa1L, 0x9ffffff6L, 
+	0xa555554bL };
+
+
+uint32_t bmw_small_expand1(uint8_t j, const uint32_t* q, const void* m){
+	uint32_t(*s[])(uint32_t) = {bmw_small_s1, bmw_small_s2, bmw_small_s3, bmw_small_s0};
+	uint32_t r;
+	uint8_t i;
+	/* r = 0x05555555*(j+16); */
+	r = pgm_read_dword(k_lut+j);
+	for(i=0; i<16; ++i){
+		r += s[i%4](q[j+i]);
+	}
+	r += ((uint32_t*)m)[j];
+	r += ((uint32_t*)m)[j+3];
+	r -= ((uint32_t*)m)[j+10];
+	return r;
+}
+
+uint32_t bmw_small_expand2(uint8_t j, const uint32_t* q, const void* m){
+	uint32_t(*rf[])(uint32_t) = {bmw_small_r1, bmw_small_r2, bmw_small_r3,
+	                             bmw_small_r4, bmw_small_r5, bmw_small_r6,
+							     bmw_small_r7};
+	uint32_t r;
+	uint8_t i;
+	/* r = 0x05555555*(j+16); */
+	r = pgm_read_dword(k_lut+j);
+	for(i=0; i<14; i+=2){
+		r += q[j+i];
+	}
+	for(i=0; i<14; i+=2){
+		r += rf[i/2](q[j+i+1]);
+	}
+	r += bmw_small_s5(q[j+14]);
+	r += bmw_small_s4(q[j+15]);
+	r += ((uint32_t*)m)[j];
+	r += ((uint32_t*)m)[(j+3)%16];
+	r -= ((uint32_t*)m)[(j+10)%16];
+	return r;
+}
+#if F0_HACK
+static
+uint8_t f0_lut[] PROGMEM = {
+	 5<<1, ( 7<<1)+1, (10<<1)+0, (13<<1)+0, (14<<1)+0,
+	 6<<1, ( 8<<1)+1, (11<<1)+0, (14<<1)+0, (15<<1)+1,
+	 0<<1, ( 7<<1)+0, ( 9<<1)+0, (12<<1)+1, (15<<1)+0,
+	 0<<1, ( 1<<1)+1, ( 8<<1)+0, (10<<1)+1, (13<<1)+0,
+	 1<<1, ( 2<<1)+0, ( 9<<1)+0, (11<<1)+1, (14<<1)+1,
+	 3<<1, ( 2<<1)+1, (10<<1)+0, (12<<1)+1, (15<<1)+0,
+	 4<<1, ( 0<<1)+1, ( 3<<1)+1, (11<<1)+1, (13<<1)+0, 
+	 1<<1, ( 4<<1)+1, ( 5<<1)+1, (12<<1)+1, (14<<1)+1,
+	 2<<1, ( 5<<1)+1, ( 6<<1)+1, (13<<1)+0, (15<<1)+1,
+	 0<<1, ( 3<<1)+1, ( 6<<1)+0, ( 7<<1)+1, (14<<1)+0,
+	 8<<1, ( 1<<1)+1, ( 4<<1)+1, ( 7<<1)+1, (15<<1)+0,
+	 8<<1, ( 0<<1)+1, ( 2<<1)+1, ( 5<<1)+1, ( 9<<1)+0,
+	 1<<1, ( 3<<1)+0, ( 6<<1)+1, ( 9<<1)+1, (10<<1)+0,
+	 2<<1, ( 4<<1)+0, ( 7<<1)+0, (10<<1)+0, (11<<1)+0,
+	 3<<1, ( 5<<1)+1, ( 8<<1)+0, (11<<1)+1, (12<<1)+1,
+	12<<1, ( 4<<1)+1, ( 6<<1)+1, ( 9<<1)+1, (13<<1)+0
+};
+
+void bmw_small_f0(uint32_t* q, uint32_t* h, const void* m){
+	uint8_t i,j=-1,v,sign,l=0;
+	uint32_t(*s[])(uint32_t)={ bmw_small_s0, bmw_small_s1, bmw_small_s2,
+	                           bmw_small_s3, bmw_small_s4 };
+	for(i=0; i<16; ++i){
+		h[i] ^= ((uint32_t*)m)[i];
+	}
+	dump_x(h, 16, 'T');
+	// memset(q, 0, 4*16);
+	for(i=0; i<5*16; ++i){
+		v = pgm_read_byte(f0_lut+i);
+		sign = v&1;
+		v >>=1;
+		if(i==l){
+			j++;
+			l+=5;
+			q[j] = h[v];
+			continue;
+		}
+		if(sign){
+			q[j] -= h[v];
+		}else{
+			q[j] += h[v];
+		}
+	}
+	dump_x(q, 16, 'W');
+	for(i=0; i<16; ++i){
+		q[i] = s[i%5](q[i]);
+	}	
+}
+
+#else
+void bmw_small_f0(uint32_t* q, uint32_t* h, const void* m){
+	uint8_t i;
+	uint32_t(*s[])(uint32_t)={ bmw_small_s0, bmw_small_s1, bmw_small_s2,
+	                           bmw_small_s3, bmw_small_s4 };
+	for(i=0; i<16; ++i){
+		h[i] ^= ((uint32_t*)m)[i];
+	}
+	dump_x(h, 16, 'T');
+	q[ 0] = (h[ 5] - h[ 7] + h[10] + h[13] + h[14]);
+	q[ 1] = (h[ 6] - h[ 8] + h[11] + h[14] - h[15]);
+	q[ 2] = (h[ 0] + h[ 7] + h[ 9] - h[12] + h[15]);
+	q[ 3] = (h[ 0] - h[ 1] + h[ 8] - h[10] + h[13]);
+	q[ 4] = (h[ 1] + h[ 2] + h[ 9] - h[11] - h[14]);
+	q[ 5] = (h[ 3] - h[ 2] + h[10] - h[12] + h[15]);
+	q[ 6] = (h[ 4] - h[ 0] - h[ 3] - h[11] + h[13]); 
+	q[ 7] = (h[ 1] - h[ 4] - h[ 5] - h[12] - h[14]);
+	q[ 8] = (h[ 2] - h[ 5] - h[ 6] + h[13] - h[15]);
+	q[ 9] = (h[ 0] - h[ 3] + h[ 6] - h[ 7] + h[14]);
+	q[10] = (h[ 8] - h[ 1] - h[ 4] - h[ 7] + h[15]);
+	q[11] = (h[ 8] - h[ 0] - h[ 2] - h[ 5] + h[ 9]);
+	q[12] = (h[ 1] + h[ 3] - h[ 6] - h[ 9] + h[10]);
+	q[13] = (h[ 2] + h[ 4] + h[ 7] + h[10] + h[11]);
+	q[14] = (h[ 3] - h[ 5] + h[ 8] - h[11] - h[12]);
+	q[15] = (h[12] - h[ 4] - h[ 6] - h[ 9] + h[13]); 
+	dump_x(q, 16, 'W');
+	for(i=0; i<16; ++i){
+		q[i] = s[i%5](q[i]);
+	}	
+}
+#endif
+
+void bmw_small_f1(uint32_t* q, const void* m){
+	uint8_t i;
+	q[16] = bmw_small_expand1(0, q, m);
+	q[17] = bmw_small_expand1(1, q, m);
+	for(i=2; i<16; ++i){
+		q[16+i] = bmw_small_expand2(i, q, m);
+	}
+}
+
+void bmw_small_f2(uint32_t* h, const uint32_t* q, const void* m){
+	uint32_t xl=0, xh;
+	uint8_t i;
+	for(i=16;i<24;++i){
+		xl ^= q[i];
+	}
+	xh = xl;
+	for(i=24;i<32;++i){
+		xh ^= q[i];
+	}
+#if DEBUG	
+	cli_putstr_P(PSTR("\r\n XL = "));
+	cli_hexdump_rev(&xl, 4);
+	cli_putstr_P(PSTR("\r\n XH = "));
+	cli_hexdump_rev(&xh, 4);
+#endif
+	memcpy(h, m, 16*4);
+	h[0] ^= SHL32(xh, 5) ^ SHR32(q[16], 5);
+	h[1] ^= SHR32(xh, 7) ^ SHL32(q[17], 8);
+	h[2] ^= SHR32(xh, 5) ^ SHL32(q[18], 5);
+	h[3] ^= SHR32(xh, 1) ^ SHL32(q[19], 5);
+	h[4] ^= SHR32(xh, 3) ^ q[20];
+	h[5] ^= SHL32(xh, 6) ^ SHR32(q[21], 6);
+	h[6] ^= SHR32(xh, 4) ^ SHL32(q[22], 6);
+	h[7] ^= SHR32(xh,11) ^ SHL32(q[23], 2);
+	for(i=0; i<8; ++i){
+		h[i] += xl ^ q[24+i] ^ q[i];
+	}
+	for(i=0; i<8; ++i){
+		h[8+i] ^= xh ^ q[24+i];
+		h[8+i] += ROTL32(h[(4+i)%8],i+9);
+	}
+	h[ 8] += SHL32(xl, 8) ^ q[23] ^ q[ 8];
+	h[ 9] += SHR32(xl, 6) ^ q[16] ^ q[ 9];
+	h[10] += SHL32(xl, 6) ^ q[17] ^ q[10];
+	h[11] += SHL32(xl, 4) ^ q[18] ^ q[11];
+	h[12] += SHR32(xl, 3) ^ q[19] ^ q[12];
+	h[13] += SHR32(xl, 4) ^ q[20] ^ q[13];
+	h[14] += SHR32(xl, 7) ^ q[21] ^ q[14];
+	h[15] += SHR32(xl, 2) ^ q[22] ^ q[15];
+}
+
+void bmw_small_nextBlock(bmw_small_ctx_t* ctx, const void* block){
+	uint32_t q[32];
+	dump_x(block, 16, 'M');
+	bmw_small_f0(q, ctx->h, block);
+	dump_x(q, 16, 'Q');
+	bmw_small_f1(q, block);
+	dump_x(q, 32, 'Q');
+	bmw_small_f2(ctx->h, q, block);
+	ctx->counter += 1;
+	ctx_dump(ctx);
+}
+
+void bmw_small_lastBlock(bmw_small_ctx_t* ctx, const void* block, uint16_t length_b){
+	uint8_t buffer[64];
+	while(length_b >= BMW_SMALL_BLOCKSIZE){
+		bmw_small_nextBlock(ctx, block);
+		length_b -= BMW_SMALL_BLOCKSIZE;
+		block = (uint8_t*)block + BMW_SMALL_BLOCKSIZE_B;
+	}
+	memset(buffer, 0, 64);
+	memcpy(buffer, block, (length_b+7)/8);
+	buffer[length_b>>3] |= 0x80 >> (length_b&0x07);
+	if(length_b+1>64*8-64){
+		bmw_small_nextBlock(ctx, buffer);
+		memset(buffer, 0, 64-8);
+		ctx->counter -= 1;
+	}
+	*((uint64_t*)&(buffer[64-8])) = (uint64_t)(ctx->counter*512LL)+(uint64_t)length_b;
+	bmw_small_nextBlock(ctx, buffer);
+}
+
+void bmw224_init(bmw224_ctx_t* ctx){
+	uint8_t i;
+	ctx->h[0] = 0x00010203;
+	for(i=1; i<16; ++i){
+		ctx->h[i] = ctx->h[i-1]+ 0x04040404;
+	}
+#if BUG24	
+	ctx->h[13] = 0x24353637;
+#endif
+	ctx->counter=0;
+	ctx_dump(ctx);
+}
+
+void bmw256_init(bmw256_ctx_t* ctx){
+	uint8_t i;
+	ctx->h[0] = 0x40414243;
+	for(i=1; i<16; ++i){
+		ctx->h[i] = ctx->h[i-1]+ 0x04040404;
+	}
+	ctx->counter=0;
+	ctx_dump(ctx);
+}
+
+void bmw224_nextBlock(bmw224_ctx_t* ctx, const void* block){
+	bmw_small_nextBlock(ctx, block);
+}
+
+void bmw256_nextBlock(bmw256_ctx_t* ctx, const void* block){
+	bmw_small_nextBlock(ctx, block);
+}
+
+void bmw224_lastBlock(bmw224_ctx_t* ctx, const void* block, uint16_t length_b){
+	bmw_small_lastBlock(ctx, block, length_b);
+}
+
+void bmw256_lastBlock(bmw256_ctx_t* ctx, const void* block, uint16_t length_b){
+	bmw_small_lastBlock(ctx, block, length_b);
+}
+
+void bmw224_ctx2hash(void* dest, const bmw224_ctx_t* ctx){
+	memcpy(dest, &(ctx->h[9]), 224/8);
+}
+
+void bmw256_ctx2hash(void* dest, const bmw256_ctx_t* ctx){
+	memcpy(dest, &(ctx->h[8]), 256/8);
+}
+
+void bmw224(void* dest, const void* msg, uint32_t length_b){
+	bmw_small_ctx_t ctx;
+	bmw224_init(&ctx);
+	while(length_b>=BMW_SMALL_BLOCKSIZE){
+		bmw_small_nextBlock(&ctx, msg);
+		length_b -= BMW_SMALL_BLOCKSIZE;
+		msg = (uint8_t*)msg + BMW_SMALL_BLOCKSIZE_B;
+	}
+	bmw_small_lastBlock(&ctx, msg, length_b);
+	bmw224_ctx2hash(dest, &ctx);
+}
+
+void bmw256(void* dest, const void* msg, uint32_t length_b){
+	bmw_small_ctx_t ctx;
+	bmw256_init(&ctx);
+	while(length_b>=BMW_SMALL_BLOCKSIZE){
+		bmw_small_nextBlock(&ctx, msg);
+		length_b -= BMW_SMALL_BLOCKSIZE;
+		msg = (uint8_t*)msg + BMW_SMALL_BLOCKSIZE_B;
+	}
+	bmw_small_lastBlock(&ctx, msg, length_b);
+	bmw256_ctx2hash(dest, &ctx);
+}
+
diff --git a/bmw/bmw_small.h b/bmw/bmw_small.h
new file mode 100644
index 0000000..f314712
--- /dev/null
+++ b/bmw/bmw_small.h
@@ -0,0 +1,65 @@
+/* bmw_small.h */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2009  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/*
+ * \file    bmw_small.h
+ * \author  Daniel Otte
+ * \email   daniel.otte@rub.de
+ * \date    2009-04-27
+ * \license GPLv3 or later
+ * 
+ */
+#ifndef BMW_SMALL_H_
+#define BMW_SMALL_H_
+
+#include <stdint.h>
+
+#define BMW_SMALL_BLOCKSIZE   512
+#define BMW_SMALL_BLOCKSIZE_B ((BMW_SMALL_BLOCKSIZE+7)/8)
+#define BMW224_BLOCKSIZE      BMW_SMALL_BLOCKSIZE
+#define BMW224_BLOCKSIZE_B    BMW_SMALL_BLOCKSIZE_B
+#define BMW256_BLOCKSIZE      BMW_SMALL_BLOCKSIZE
+#define BMW256_BLOCKSIZE_B    BMW_SMALL_BLOCKSIZE_B
+
+typedef struct {
+	uint32_t h[16];
+	uint32_t counter;
+} bmw_small_ctx_t;
+
+typedef bmw_small_ctx_t bmw224_ctx_t;
+typedef bmw_small_ctx_t bmw256_ctx_t;
+
+void bmw224_init(bmw224_ctx_t* ctx);
+void bmw256_init(bmw256_ctx_t* ctx);
+
+void bmw_small_nextBlock(bmw_small_ctx_t* ctx, const void* block);
+void bmw_small_lastBlock(bmw_small_ctx_t* ctx, const void* block, uint16_t length_b);
+
+void bmw224_nextBlock(bmw224_ctx_t* ctx, const void* block);
+void bmw224_lastBlock(bmw224_ctx_t* ctx, const void* block, uint16_t length_b);
+
+void bmw256_nextBlock(bmw256_ctx_t* ctx, const void* block);
+void bmw256_lastBlock(bmw256_ctx_t* ctx, const void* block, uint16_t length_b);
+
+void bmw224_ctx2hash(void* dest, const bmw224_ctx_t* ctx);
+void bmw256_ctx2hash(void* dest, const bmw256_ctx_t* ctx);
+
+void bmw224(void* dest, const void* msg, uint32_t length_b);
+void bmw256(void* dest, const void* msg, uint32_t length_b);
+
+#endif /* BMW_SMALL_H_ */
diff --git a/bmw_large.c b/bmw_large.c
deleted file mode 100644
index 10e65f6..0000000
--- a/bmw_large.c
+++ /dev/null
@@ -1,468 +0,0 @@
-/* bmw_large.c */
-/*
-    This file is part of the AVR-Crypto-Lib.
-    Copyright (C) 2009  Daniel Otte (daniel.otte@rub.de)
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-/*
- * \file    bmw_large.c
- * \author  Daniel Otte
- * \email   daniel.otte@rub.de
- * \date    2009-04-27
- * \license GPLv3 or later
- * 
- */
-
-#include <stdint.h>
-#include <string.h>
-#include <avr/pgmspace.h>
-#include "bmw_large.h"
-
-#define SHL64(a,n) ((a)<<(n))
-#define SHR64(a,n) ((a)>>(n))
-#define ROTL64(a,n) (((a)<<(n))|((a)>>(64-(n))))
-#define ROTR64(a,n) (((a)>>(n))|((a)<<(64-(n))))
-
-#define BUG24 0
-#define F0_HACK 1
-#define DEBUG 0
-#if DEBUG
- #include "cli.h"
- 
- void ctx_dump(const bmw_large_ctx_t* ctx){
- 	uint8_t i;
-	cli_putstr_P(PSTR("\r\n==== ctx dump ===="));
-	for(i=0; i<16;++i){
-		cli_putstr_P(PSTR("\r\n h["));
-		cli_hexdump(&i, 1);
-		cli_putstr_P(PSTR("] = "));
-		cli_hexdump_rev(&(ctx->h[i]), 8);
-	}
-	cli_putstr_P(PSTR("\r\n counter = "));
-	cli_hexdump(&(ctx->counter), 4);
- }
- 
- void dump_x(const uint64_t* q, uint8_t elements, char x){
-	uint8_t i;
- 	cli_putstr_P(PSTR("\r\n==== "));
-	cli_putc(x);
-	cli_putstr_P(PSTR(" dump ===="));
-	for(i=0; i<elements;++i){
-		cli_putstr_P(PSTR("\r\n "));
-		cli_putc(x);
-		cli_putstr_P(PSTR("["));
-		cli_hexdump(&i, 1);
-		cli_putstr_P(PSTR("] = "));
-		cli_hexdump_rev(&(q[i]), 8);
-	}
- }
-#else
- #define ctx_dump(x)
- #define dump_x(a,b,c)
-#endif
-
-uint64_t bmw_large_s0(uint64_t x){
-	uint64_t r;
-	r =   SHR64(x, 1)
-		^ SHL64(x, 3)
-		^ ROTL64(x, 4)
-		^ ROTR64(x, 64-37);
-	return r;	
-}
-
-uint64_t bmw_large_s1(uint64_t x){
-	uint64_t r;
-	r =   SHR64(x, 1)
-		^ SHL64(x, 2)
-		^ ROTL64(x,13)
-		^ ROTR64(x,64-43);
-	return r;	
-}
-
-uint64_t bmw_large_s2(uint64_t x){
-	uint64_t r;
-	r =   SHR64(x, 2)
-		^ SHL64(x, 1)
-		^ ROTL64(x, 19)
-		^ ROTR64(x, 64-53);
-	return r;	
-}
-
-uint64_t bmw_large_s3(uint64_t x){
-	uint64_t r;
-	r =   SHR64(x, 2)
-		^ SHL64(x, 2)
-		^ ROTL64(x, 28)
-		^ ROTR64(x, 64-59);
-	return r;	
-}
-
-uint64_t bmw_large_s4(uint64_t x){
-	uint64_t r;
-	r =   SHR64(x, 1)
-		^ x;
-	return r;	
-}
-
-uint64_t bmw_large_s5(uint64_t x){
-	uint64_t r;
-	r =   SHR64(x, 2)
-		^ x;
-	return r;	
-}
-
-uint64_t bmw_large_r1(uint64_t x){
-	uint64_t r;
-	r =   ROTL64(x, 5);
-	return r;	
-}
-
-uint64_t bmw_large_r2(uint64_t x){
-	uint64_t r;
-	r =   ROTL64(x, 11);
-	return r;	
-}
-
-uint64_t bmw_large_r3(uint64_t x){
-	uint64_t r;
-	r =   ROTL64(x, 27);
-	return r;	
-}
-
-uint64_t bmw_large_r4(uint64_t x){
-	uint64_t r;
-	r =   ROTL64(x, 32);
-	return r;	
-}
-
-uint64_t bmw_large_r5(uint64_t x){
-	uint64_t r;
-	r =   ROTR64(x, 64-37);
-	return r;	
-}
-
-uint64_t bmw_large_r6(uint64_t x){
-	uint64_t r;
-	r =   ROTR64(x, 64-43);
-	return r;	
-}
-
-uint64_t bmw_large_r7(uint64_t x){
-	uint64_t r;
-	r =   ROTR64(x, 64-53);
-	return r;	
-}
-/*
-#define K    0x0555555555555555LL
-#define MASK 0xFFFFFFFFFFFFFFFFLL
-static
-uint64_t k_lut[] PROGMEM = {
-	16LL*K, 17LL*K, 18LL*K, 19LL*K, 
-	20LL*K, 21LL*K, 22LL*K, 23LL*K,
-	24LL*K, 25LL*K, 26LL*K, 27LL*K,
-	28LL*K, 29LL*K, 30LL*K, 31LL*K };
-*/	
-/* the same as above but precomputed to avoid compiler warnings */
-static
-uint64_t k_lut[] PROGMEM = {
-	0x5555555555555550LL, 0x5aaaaaaaaaaaaaa5LL, 0x5ffffffffffffffaLL,
-	0x655555555555554fLL, 0x6aaaaaaaaaaaaaa4LL, 0x6ffffffffffffff9LL,
-	0x755555555555554eLL, 0x7aaaaaaaaaaaaaa3LL, 0x7ffffffffffffff8LL,
-	0x855555555555554dLL, 0x8aaaaaaaaaaaaaa2LL, 0x8ffffffffffffff7LL,
-	0x955555555555554cLL, 0x9aaaaaaaaaaaaaa1LL, 0x9ffffffffffffff6LL,
-	0xa55555555555554bLL };
-
-uint64_t bmw_large_expand1(uint8_t j, const uint64_t* q, const void* m){
-	uint64_t(*s[])(uint64_t) = {bmw_large_s1, bmw_large_s2, bmw_large_s3, bmw_large_s0};
-	union{
-		uint64_t v64;
-		uint32_t v32[2];
-	} r;
-	uint8_t i;
-	/* r = 0x0555555555555555LL*(j+16); */
-	r.v32[0] = pgm_read_dword(((uint8_t*)k_lut+8*j));
-	r.v32[1] = pgm_read_dword(((uint8_t*)k_lut+8*j+4));
-	for(i=0; i<16; ++i){
-		r.v64 += s[i%4](q[j+i]);
-	}
-	r.v64 += ((uint64_t*)m)[j];
-	r.v64 += ((uint64_t*)m)[j+3];
-	r.v64 -= ((uint64_t*)m)[j+10];
-	return r.v64;
-}
-
-uint64_t bmw_large_expand2(uint8_t j, const uint64_t* q, const void* m){
-	uint64_t(*rf[])(uint64_t) = {bmw_large_r1, bmw_large_r2, bmw_large_r3,
-	                             bmw_large_r4, bmw_large_r5, bmw_large_r6,
-							     bmw_large_r7};
-	union{
-		uint64_t v64;
-		uint32_t v32[2];
-	} r;
-	uint8_t i;
-	/* r = 0x0555555555555555LL*(j+16); */
-	r.v32[0] = pgm_read_dword(((uint8_t*)k_lut+8*j));
-	r.v32[1] = pgm_read_dword(((uint8_t*)k_lut+8*j+4));
-	for(i=0; i<14; i+=2){
-		r.v64 += q[j+i];
-	}
-	for(i=0; i<14; i+=2){
-		r.v64 += rf[i/2](q[j+i+1]);
-	}
-	r.v64 += bmw_large_s5(q[j+14]);
-	r.v64 += bmw_large_s4(q[j+15]);
-	r.v64 += ((uint64_t*)m)[j];
-	r.v64 += ((uint64_t*)m)[(j+3)%16];
-	r.v64 -= ((uint64_t*)m)[(j+10)%16];
-	return r.v64;
-}
-
-#if F0_HACK
-static
-uint8_t f0_lut[] PROGMEM ={
-	 5<<1, ( 7<<1)+1, (10<<1)+0, (13<<1)+0, (14<<1)+0,
-	 6<<1, ( 8<<1)+1, (11<<1)+0, (14<<1)+0, (15<<1)+1,
-	 0<<1, ( 7<<1)+0, ( 9<<1)+0, (12<<1)+1, (15<<1)+0,
-	 0<<1, ( 1<<1)+1, ( 8<<1)+0, (10<<1)+1, (13<<1)+0,
-	 1<<1, ( 2<<1)+0, ( 9<<1)+0, (11<<1)+1, (14<<1)+1,
-	 3<<1, ( 2<<1)+1, (10<<1)+0, (12<<1)+1, (15<<1)+0,
-	 4<<1, ( 0<<1)+1, ( 3<<1)+1, (11<<1)+1, (13<<1)+0, 
-	 1<<1, ( 4<<1)+1, ( 5<<1)+1, (12<<1)+1, (14<<1)+1,
-	 2<<1, ( 5<<1)+1, ( 6<<1)+1, (13<<1)+0, (15<<1)+1,
-	 0<<1, ( 3<<1)+1, ( 6<<1)+0, ( 7<<1)+1, (14<<1)+0,
-	 8<<1, ( 1<<1)+1, ( 4<<1)+1, ( 7<<1)+1, (15<<1)+0,
-	 8<<1, ( 0<<1)+1, ( 2<<1)+1, ( 5<<1)+1, ( 9<<1)+0,
-	 1<<1, ( 3<<1)+0, ( 6<<1)+1, ( 9<<1)+1, (10<<1)+0,
-	 2<<1, ( 4<<1)+0, ( 7<<1)+0, (10<<1)+0, (11<<1)+0,
-	 3<<1, ( 5<<1)+1, ( 8<<1)+0, (11<<1)+1, (12<<1)+1,
-	12<<1, ( 4<<1)+1, ( 6<<1)+1, ( 9<<1)+1, (13<<1)+0
-};
-
-void bmw_large_f0(uint64_t* q, uint64_t* h, const void* m){
-	uint8_t i,j=-1,v,sign,l=0;
-	uint64_t(*s[])(uint64_t)={ bmw_large_s0, bmw_large_s1, bmw_large_s2,
-	                           bmw_large_s3, bmw_large_s4 };
-	for(i=0; i<16; ++i){
-		h[i] ^= ((uint64_t*)m)[i];
-	}
-	dump_x(h, 16, 'T');
-//	memset(q, 0, 4*16);
-	for(i=0; i<5*16; ++i){
-		v = pgm_read_byte(f0_lut+i);
-		sign = v&1;
-		v >>=1;
-		if(i==l){
-			j++;
-			l+=5;
-			q[j] = h[v];
-			continue;
-		}
-		if(sign){
-			q[j] -= h[v];
-		}else{
-			q[j] += h[v];
-		}
-	}
-	dump_x(q, 16, 'W');
-	for(i=0; i<16; ++i){
-		q[i] = s[i%5](q[i]);
-	}	
-}
-
-#else
-void bmw_large_f0(uint64_t* q, uint64_t* h, const void* m){
-	uint8_t i;
-	uint64_t(*s[])(uint64_t)={ bmw_large_s0, bmw_large_s1, bmw_large_s2,
-	                           bmw_large_s3, bmw_large_s4 };
-	for(i=0; i<16; ++i){
-		h[i] ^= ((uint64_t*)m)[i];
-	}
-	dump_x(t, 16, 'T');
-	q[ 0] = (h[ 5] - h[ 7] + h[10] + h[13] + h[14]);
-	q[ 1] = (h[ 6] - h[ 8] + h[11] + h[14] - h[15]);
-	q[ 2] = (h[ 0] + h[ 7] + h[ 9] - h[12] + h[15]);
-	q[ 3] = (h[ 0] - h[ 1] + h[ 8] - h[10] + h[13]);
-	q[ 4] = (h[ 1] + h[ 2] + h[ 9] - h[11] - h[14]);
-	q[ 5] = (h[ 3] - h[ 2] + h[10] - h[12] + h[15]);
-	q[ 6] = (h[ 4] - h[ 0] - h[ 3] - h[11] + h[13]); 
-	q[ 7] = (h[ 1] - h[ 4] - h[ 5] - h[12] - h[14]);
-	q[ 8] = (h[ 2] - h[ 5] - h[ 6] + h[13] - h[15]);
-	q[ 9] = (h[ 0] - h[ 3] + h[ 6] - h[ 7] + h[14]);
-	q[10] = (h[ 8] - h[ 1] - h[ 4] - h[ 7] + h[15]);
-	q[11] = (h[ 8] - h[ 0] - h[ 2] - h[ 5] + h[ 9]);
-	q[12] = (h[ 1] + h[ 3] - h[ 6] - h[ 9] + h[10]);
-	q[13] = (h[ 2] + h[ 4] + h[ 7] + h[10] + h[11]);
-	q[14] = (h[ 3] - h[ 5] + h[ 8] - h[11] - h[12]);
-	q[15] = (h[12] - h[ 4] - h[ 6] - h[ 9] + h[13]); 
-	dump_x(q, 16, 'W');
-	for(i=0; i<16; ++i){
-		q[i] = s[i%5](q[i]);
-	}	
-}
-#endif
-
-void bmw_large_f1(uint64_t* q, const void* m){
-	uint8_t i;
-	q[16] = bmw_large_expand1(0, q, m);
-	q[17] = bmw_large_expand1(1, q, m);
-	for(i=2; i<16; ++i){
-		q[16+i] = bmw_large_expand2(i, q, m);
-	}
-}
-
-void bmw_large_f2(uint64_t* h, const uint64_t* q, const void* m){
-	uint64_t xl=0, xh;
-	uint8_t i;
-	for(i=16;i<24;++i){
-		xl ^= q[i];
-	}
-	xh = xl;
-	for(i=24;i<32;++i){
-		xh ^= q[i];
-	}
-#if DEBUG	
-	cli_putstr_P(PSTR("\r\n XL = "));
-	cli_hexdump_rev(&xl, 4);
-	cli_putstr_P(PSTR("\r\n XH = "));
-	cli_hexdump_rev(&xh, 4);
-#endif
-	memcpy(h, m, 16*8);
-	h[0] ^= SHL64(xh, 5) ^ SHR64(q[16], 5);
-	h[1] ^= SHR64(xh, 7) ^ SHL64(q[17], 8);
-	h[2] ^= SHR64(xh, 5) ^ SHL64(q[18], 5);
-	h[3] ^= SHR64(xh, 1) ^ SHL64(q[19], 5);
-	h[4] ^= SHR64(xh, 3) ^ q[20];
-	h[5] ^= SHL64(xh, 6) ^ SHR64(q[21], 6);
-	h[6] ^= SHR64(xh, 4) ^ SHL64(q[22], 6);
-	h[7] ^= SHR64(xh,11) ^ SHL64(q[23], 2);
-	for(i=0; i<8; ++i){
-		h[i] += xl ^ q[24+i] ^ q[i];
-	}
-	for(i=0; i<8; ++i){
-		h[8+i] ^= xh ^ q[24+i];
-		h[8+i] += ROTL64(h[(4+i)%8],i+9);
-	}
-	h[ 8] += SHL64(xl, 8) ^ q[23] ^ q[ 8];
-	h[ 9] += SHR64(xl, 6) ^ q[16] ^ q[ 9];
-	h[10] += SHL64(xl, 6) ^ q[17] ^ q[10];
-	h[11] += SHL64(xl, 4) ^ q[18] ^ q[11];
-	h[12] += SHR64(xl, 3) ^ q[19] ^ q[12];
-	h[13] += SHR64(xl, 4) ^ q[20] ^ q[13];
-	h[14] += SHR64(xl, 7) ^ q[21] ^ q[14];
-	h[15] += SHR64(xl, 2) ^ q[22] ^ q[15];
-}
-
-void bmw_large_nextBlock(bmw_large_ctx_t* ctx, const void* block){
-	uint64_t q[32];
-	dump_x(block, 16, 'M');
-	bmw_large_f0(q, ctx->h, block);
-	dump_x(q, 16, 'Q');
-	bmw_large_f1(q, block);
-	dump_x(q, 32, 'Q');
-	bmw_large_f2(ctx->h, q, block);
-	ctx->counter += 1;
-	ctx_dump(ctx);
-}
-
-void bmw_large_lastBlock(bmw_large_ctx_t* ctx, const void* block, uint16_t length_b){
-	uint8_t buffer[128];
-	while(length_b >= BMW_LARGE_BLOCKSIZE){
-		bmw_large_nextBlock(ctx, block);
-		length_b -= BMW_LARGE_BLOCKSIZE;
-		block = (uint8_t*)block + BMW_LARGE_BLOCKSIZE_B;
-	}
-	memset(buffer, 0, 128);
-	memcpy(buffer, block, (length_b+7)/8);
-	buffer[length_b>>3] |= 0x80 >> (length_b&0x07);
-	if(length_b+1>128*8-64){
-		bmw_large_nextBlock(ctx, buffer);
-		memset(buffer, 0, 128-8);
-		ctx->counter -= 1;
-	}
-	*((uint64_t*)&(buffer[128-8])) = (uint64_t)(ctx->counter*1024LL)+(uint64_t)length_b;
-	bmw_large_nextBlock(ctx, buffer);
-}
-
-void bmw384_init(bmw384_ctx_t* ctx){
-	uint8_t i;
-	ctx->h[0] = 0x0001020304050607LL;
-	for(i=1; i<16; ++i){
-		ctx->h[i] = ctx->h[i-1]+ 0x0808080808080808LL;
-	}
-#if BUG24	
-	ctx->h[6] = 0x3031323324353637LL;
-#endif
-	ctx->counter=0;
-	ctx_dump(ctx);
-}
-
-void bmw512_init(bmw512_ctx_t* ctx){
-	uint8_t i;
-	ctx->h[0] = 0x8081828384858687LL;
-	for(i=1; i<16; ++i){
-		ctx->h[i] = ctx->h[i-1]+ 0x0808080808080808LL;
-	}
-	ctx->counter=0;
-	ctx_dump(ctx);
-}
-
-void bmw384_nextBlock(bmw384_ctx_t* ctx, const void* block){
-	bmw_large_nextBlock(ctx, block);
-}
-
-void bmw512_nextBlock(bmw512_ctx_t* ctx, const void* block){
-	bmw_large_nextBlock(ctx, block);
-}
-
-void bmw384_lastBlock(bmw384_ctx_t* ctx, const void* block, uint16_t length_b){
-	bmw_large_lastBlock(ctx, block, length_b);
-}
-
-void bmw512_lastBlock(bmw512_ctx_t* ctx, const void* block, uint16_t length_b){
-	bmw_large_lastBlock(ctx, block, length_b);
-}
-
-void bmw384_ctx2hash(void* dest, const bmw384_ctx_t* ctx){
-	memcpy(dest, &(ctx->h[10]), 384/8);
-}
-
-void bmw512_ctx2hash(void* dest, const bmw512_ctx_t* ctx){
-	memcpy(dest, &(ctx->h[8]), 512/8);
-}
-
-void bmw384(void* dest, const void* msg, uint32_t length_b){
-	bmw_large_ctx_t ctx;
-	bmw384_init(&ctx);
-	while(length_b>=BMW_LARGE_BLOCKSIZE){
-		bmw_large_nextBlock(&ctx, msg);
-		length_b -= BMW_LARGE_BLOCKSIZE;
-		msg = (uint8_t*)msg + BMW_LARGE_BLOCKSIZE_B;
-	}
-	bmw_large_lastBlock(&ctx, msg, length_b);
-	bmw384_ctx2hash(dest, &ctx);
-}
-
-void bmw512(void* dest, const void* msg, uint32_t length_b){
-	bmw_large_ctx_t ctx;
-	bmw512_init(&ctx);
-	while(length_b>=BMW_LARGE_BLOCKSIZE){
-		bmw_large_nextBlock(&ctx, msg);
-		length_b -= BMW_LARGE_BLOCKSIZE;
-		msg = (uint8_t*)msg + BMW_LARGE_BLOCKSIZE_B;
-	}
-	bmw_large_lastBlock(&ctx, msg, length_b);
-	bmw512_ctx2hash(dest, &ctx);
-}
-
diff --git a/bmw_large.h b/bmw_large.h
deleted file mode 100644
index fba01fb..0000000
--- a/bmw_large.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/* bmw_large.h */
-/*
-    This file is part of the AVR-Crypto-Lib.
-    Copyright (C) 2009  Daniel Otte (daniel.otte@rub.de)
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-/*
- * \file    bmw_large.h
- * \author  Daniel Otte
- * \email   daniel.otte@rub.de
- * \date    2009-04-27
- * \license GPLv3 or later
- * 
- */
-#ifndef BMW_LARGE_H_
-#define BMW_LARGE_H_
-
-#include <stdint.h>
-
-#define BMW_LARGE_BLOCKSIZE   1024
-#define BMW_LARGE_BLOCKSIZE_B ((BMW_LARGE_BLOCKSIZE+7)/8)
-#define BMW384_BLOCKSIZE      BMW_LARGE_BLOCKSIZE
-#define BMW384_BLOCKSIZE_B    BMW_LARGE_BLOCKSIZE_B
-#define BMW512_BLOCKSIZE      BMW_LARGE_BLOCKSIZE
-#define BMW512_BLOCKSIZE_B    BMW_LARGE_BLOCKSIZE_B
-
-typedef struct {
-	uint64_t h[16];
-	uint32_t counter;
-} bmw_large_ctx_t;
-
-typedef bmw_large_ctx_t bmw384_ctx_t;
-typedef bmw_large_ctx_t bmw512_ctx_t;
-
-void bmw384_init(bmw384_ctx_t* ctx);
-void bmw512_init(bmw512_ctx_t* ctx);
-
-void bmw_large_nextBlock(bmw_large_ctx_t* ctx, const void* block);
-void bmw_large_lastBlock(bmw_large_ctx_t* ctx, const void* block, uint16_t length_b);
-
-void bmw384_nextBlock(bmw384_ctx_t* ctx, const void* block);
-void bmw384_lastBlock(bmw384_ctx_t* ctx, const void* block, uint16_t length_b);
-
-void bmw512_nextBlock(bmw512_ctx_t* ctx, const void* block);
-void bmw512_lastBlock(bmw512_ctx_t* ctx, const void* block, uint16_t length_b);
-
-void bmw384_ctx2hash(void* dest, const bmw384_ctx_t* ctx);
-void bmw512_ctx2hash(void* dest, const bmw512_ctx_t* ctx);
-
-void bmw384(void* dest, const void* msg, uint32_t length_b);
-void bmw512(void* dest, const void* msg, uint32_t length_b);
-
-#endif /* BMW_LARGE_H_ */
diff --git a/bmw_small.c b/bmw_small.c
deleted file mode 100644
index aefd592..0000000
--- a/bmw_small.c
+++ /dev/null
@@ -1,461 +0,0 @@
-/* bmw_small.c */
-/*
-    This file is part of the AVR-Crypto-Lib.
-    Copyright (C) 2009  Daniel Otte (daniel.otte@rub.de)
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-/*
- * \file    bmw_small.c
- * \author  Daniel Otte
- * \email   daniel.otte@rub.de
- * \date    2009-04-27
- * \license GPLv3 or later
- * 
- */
-
-#include <stdint.h>
-#include <string.h>
-#include <avr/pgmspace.h>
-#include "bmw_small.h"
-
-
-#define SHL32(a,n) ((a)<<(n))
-#define SHR32(a,n) ((a)>>(n))
-#define ROTL32(a,n) (((a)<<(n))|((a)>>(32-(n))))
-#define ROTR32(a,n) (((a)>>(n))|((a)<<(32-(n))))
-
-#define BUG24   0
-#define F0_HACK 1
-
-#define DEBUG 0
-#if DEBUG
- #include "cli.h"
- 
- void ctx_dump(const bmw_small_ctx_t* ctx){
- 	uint8_t i;
-	cli_putstr_P(PSTR("\r\n==== ctx dump ===="));
-	for(i=0; i<16;++i){
-		cli_putstr_P(PSTR("\r\n h["));
-		cli_hexdump(&i, 1);
-		cli_putstr_P(PSTR("] = "));
-		cli_hexdump_rev(&(ctx->h[i]), 4);
-	}
-	cli_putstr_P(PSTR("\r\n counter = "));
-	cli_hexdump(&(ctx->counter), 4);
- }
- 
- void dump_x(const uint32_t* q, uint8_t elements, char x){
-	uint8_t i;
- 	cli_putstr_P(PSTR("\r\n==== "));
-	cli_putc(x);
-	cli_putstr_P(PSTR(" dump ===="));
-	for(i=0; i<elements;++i){
-		cli_putstr_P(PSTR("\r\n "));
-		cli_putc(x);
-		cli_putstr_P(PSTR("["));
-		cli_hexdump(&i, 1);
-		cli_putstr_P(PSTR("] = "));
-		cli_hexdump_rev(&(q[i]), 4);
-	}
- }
-#else
- #define ctx_dump(x)
- #define dump_x(a,b,c)
-#endif
-
-uint32_t bmw_small_s0(uint32_t x){
-	uint32_t r;
-	r =   SHR32(x, 1)
-		^ SHL32(x, 3)
-		^ ROTL32(x, 4)
-		^ ROTR32(x, 13);
-	return r;	
-}
-
-uint32_t bmw_small_s1(uint32_t x){
-	uint32_t r;
-	r =   SHR32(x, 1)
-		^ SHL32(x, 2)
-		^ ROTL32(x, 8)
-		^ ROTR32(x, 9);
-	return r;	
-}
-
-uint32_t bmw_small_s2(uint32_t x){
-	uint32_t r;
-	r =   SHR32(x, 2)
-		^ SHL32(x, 1)
-		^ ROTL32(x, 12)
-		^ ROTR32(x, 7);
-	return r;	
-}
-
-uint32_t bmw_small_s3(uint32_t x){
-	uint32_t r;
-	r =   SHR32(x, 2)
-		^ SHL32(x, 2)
-		^ ROTL32(x, 15)
-		^ ROTR32(x, 3);
-	return r;	
-}
-
-uint32_t bmw_small_s4(uint32_t x){
-	uint32_t r;
-	r =   SHR32(x, 1)
-		^ x;
-	return r;	
-}
-
-uint32_t bmw_small_s5(uint32_t x){
-	uint32_t r;
-	r =   SHR32(x, 2)
-		^ x;
-	return r;	
-}
-
-uint32_t bmw_small_r1(uint32_t x){
-	uint32_t r;
-	r =   ROTL32(x, 3);
-	return r;	
-}
-
-uint32_t bmw_small_r2(uint32_t x){
-	uint32_t r;
-	r =   ROTL32(x, 7);
-	return r;	
-}
-
-uint32_t bmw_small_r3(uint32_t x){
-	uint32_t r;
-	r =   ROTL32(x, 13);
-	return r;	
-}
-
-uint32_t bmw_small_r4(uint32_t x){
-	uint32_t r;
-	r =   ROTL32(x, 16);
-	return r;	
-}
-
-uint32_t bmw_small_r5(uint32_t x){
-	uint32_t r;
-	r =   ROTR32(x, 13);
-	return r;	
-}
-
-uint32_t bmw_small_r6(uint32_t x){
-	uint32_t r;
-	r =   ROTR32(x, 9);
-	return r;	
-}
-
-uint32_t bmw_small_r7(uint32_t x){
-	uint32_t r;
-	r =   ROTR32(x, 5);
-	return r;	
-}
-/*
-#define K 0x05555555L
-static
-uint32_t k_lut[] PROGMEM = {
-	16L*K, 17L*K, 18L*K, 19L*K, 20L*K, 21L*K, 22L*K, 23L*K,
-	24L*K, 25L*K, 26L*K, 27L*K, 28L*K, 29L*K, 30L*K, 31L*K
-};
-*/
-/* same as above but precomputed to avoid compiler warnings */
-
-static
-uint32_t k_lut[] PROGMEM = {
-	0x55555550L, 0x5aaaaaa5L, 0x5ffffffaL, 
-	0x6555554fL, 0x6aaaaaa4L, 0x6ffffff9L, 
-	0x7555554eL, 0x7aaaaaa3L, 0x7ffffff8L, 
-	0x8555554dL, 0x8aaaaaa2L, 0x8ffffff7L,
-	0x9555554cL, 0x9aaaaaa1L, 0x9ffffff6L, 
-	0xa555554bL };
-
-
-uint32_t bmw_small_expand1(uint8_t j, const uint32_t* q, const void* m){
-	uint32_t(*s[])(uint32_t) = {bmw_small_s1, bmw_small_s2, bmw_small_s3, bmw_small_s0};
-	uint32_t r;
-	uint8_t i;
-	/* r = 0x05555555*(j+16); */
-	r = pgm_read_dword(k_lut+j);
-	for(i=0; i<16; ++i){
-		r += s[i%4](q[j+i]);
-	}
-	r += ((uint32_t*)m)[j];
-	r += ((uint32_t*)m)[j+3];
-	r -= ((uint32_t*)m)[j+10];
-	return r;
-}
-
-uint32_t bmw_small_expand2(uint8_t j, const uint32_t* q, const void* m){
-	uint32_t(*rf[])(uint32_t) = {bmw_small_r1, bmw_small_r2, bmw_small_r3,
-	                             bmw_small_r4, bmw_small_r5, bmw_small_r6,
-							     bmw_small_r7};
-	uint32_t r;
-	uint8_t i;
-	/* r = 0x05555555*(j+16); */
-	r = pgm_read_dword(k_lut+j);
-	for(i=0; i<14; i+=2){
-		r += q[j+i];
-	}
-	for(i=0; i<14; i+=2){
-		r += rf[i/2](q[j+i+1]);
-	}
-	r += bmw_small_s5(q[j+14]);
-	r += bmw_small_s4(q[j+15]);
-	r += ((uint32_t*)m)[j];
-	r += ((uint32_t*)m)[(j+3)%16];
-	r -= ((uint32_t*)m)[(j+10)%16];
-	return r;
-}
-#if F0_HACK
-static
-uint8_t f0_lut[] PROGMEM = {
-	 5<<1, ( 7<<1)+1, (10<<1)+0, (13<<1)+0, (14<<1)+0,
-	 6<<1, ( 8<<1)+1, (11<<1)+0, (14<<1)+0, (15<<1)+1,
-	 0<<1, ( 7<<1)+0, ( 9<<1)+0, (12<<1)+1, (15<<1)+0,
-	 0<<1, ( 1<<1)+1, ( 8<<1)+0, (10<<1)+1, (13<<1)+0,
-	 1<<1, ( 2<<1)+0, ( 9<<1)+0, (11<<1)+1, (14<<1)+1,
-	 3<<1, ( 2<<1)+1, (10<<1)+0, (12<<1)+1, (15<<1)+0,
-	 4<<1, ( 0<<1)+1, ( 3<<1)+1, (11<<1)+1, (13<<1)+0, 
-	 1<<1, ( 4<<1)+1, ( 5<<1)+1, (12<<1)+1, (14<<1)+1,
-	 2<<1, ( 5<<1)+1, ( 6<<1)+1, (13<<1)+0, (15<<1)+1,
-	 0<<1, ( 3<<1)+1, ( 6<<1)+0, ( 7<<1)+1, (14<<1)+0,
-	 8<<1, ( 1<<1)+1, ( 4<<1)+1, ( 7<<1)+1, (15<<1)+0,
-	 8<<1, ( 0<<1)+1, ( 2<<1)+1, ( 5<<1)+1, ( 9<<1)+0,
-	 1<<1, ( 3<<1)+0, ( 6<<1)+1, ( 9<<1)+1, (10<<1)+0,
-	 2<<1, ( 4<<1)+0, ( 7<<1)+0, (10<<1)+0, (11<<1)+0,
-	 3<<1, ( 5<<1)+1, ( 8<<1)+0, (11<<1)+1, (12<<1)+1,
-	12<<1, ( 4<<1)+1, ( 6<<1)+1, ( 9<<1)+1, (13<<1)+0
-};
-
-void bmw_small_f0(uint32_t* q, uint32_t* h, const void* m){
-	uint8_t i,j=-1,v,sign,l=0;
-	uint32_t(*s[])(uint32_t)={ bmw_small_s0, bmw_small_s1, bmw_small_s2,
-	                           bmw_small_s3, bmw_small_s4 };
-	for(i=0; i<16; ++i){
-		h[i] ^= ((uint32_t*)m)[i];
-	}
-	dump_x(h, 16, 'T');
-	// memset(q, 0, 4*16);
-	for(i=0; i<5*16; ++i){
-		v = pgm_read_byte(f0_lut+i);
-		sign = v&1;
-		v >>=1;
-		if(i==l){
-			j++;
-			l+=5;
-			q[j] = h[v];
-			continue;
-		}
-		if(sign){
-			q[j] -= h[v];
-		}else{
-			q[j] += h[v];
-		}
-	}
-	dump_x(q, 16, 'W');
-	for(i=0; i<16; ++i){
-		q[i] = s[i%5](q[i]);
-	}	
-}
-
-#else
-void bmw_small_f0(uint32_t* q, uint32_t* h, const void* m){
-	uint8_t i;
-	uint32_t(*s[])(uint32_t)={ bmw_small_s0, bmw_small_s1, bmw_small_s2,
-	                           bmw_small_s3, bmw_small_s4 };
-	for(i=0; i<16; ++i){
-		h[i] ^= ((uint32_t*)m)[i];
-	}
-	dump_x(h, 16, 'T');
-	q[ 0] = (h[ 5] - h[ 7] + h[10] + h[13] + h[14]);
-	q[ 1] = (h[ 6] - h[ 8] + h[11] + h[14] - h[15]);
-	q[ 2] = (h[ 0] + h[ 7] + h[ 9] - h[12] + h[15]);
-	q[ 3] = (h[ 0] - h[ 1] + h[ 8] - h[10] + h[13]);
-	q[ 4] = (h[ 1] + h[ 2] + h[ 9] - h[11] - h[14]);
-	q[ 5] = (h[ 3] - h[ 2] + h[10] - h[12] + h[15]);
-	q[ 6] = (h[ 4] - h[ 0] - h[ 3] - h[11] + h[13]); 
-	q[ 7] = (h[ 1] - h[ 4] - h[ 5] - h[12] - h[14]);
-	q[ 8] = (h[ 2] - h[ 5] - h[ 6] + h[13] - h[15]);
-	q[ 9] = (h[ 0] - h[ 3] + h[ 6] - h[ 7] + h[14]);
-	q[10] = (h[ 8] - h[ 1] - h[ 4] - h[ 7] + h[15]);
-	q[11] = (h[ 8] - h[ 0] - h[ 2] - h[ 5] + h[ 9]);
-	q[12] = (h[ 1] + h[ 3] - h[ 6] - h[ 9] + h[10]);
-	q[13] = (h[ 2] + h[ 4] + h[ 7] + h[10] + h[11]);
-	q[14] = (h[ 3] - h[ 5] + h[ 8] - h[11] - h[12]);
-	q[15] = (h[12] - h[ 4] - h[ 6] - h[ 9] + h[13]); 
-	dump_x(q, 16, 'W');
-	for(i=0; i<16; ++i){
-		q[i] = s[i%5](q[i]);
-	}	
-}
-#endif
-
-void bmw_small_f1(uint32_t* q, const void* m){
-	uint8_t i;
-	q[16] = bmw_small_expand1(0, q, m);
-	q[17] = bmw_small_expand1(1, q, m);
-	for(i=2; i<16; ++i){
-		q[16+i] = bmw_small_expand2(i, q, m);
-	}
-}
-
-void bmw_small_f2(uint32_t* h, const uint32_t* q, const void* m){
-	uint32_t xl=0, xh;
-	uint8_t i;
-	for(i=16;i<24;++i){
-		xl ^= q[i];
-	}
-	xh = xl;
-	for(i=24;i<32;++i){
-		xh ^= q[i];
-	}
-#if DEBUG	
-	cli_putstr_P(PSTR("\r\n XL = "));
-	cli_hexdump_rev(&xl, 4);
-	cli_putstr_P(PSTR("\r\n XH = "));
-	cli_hexdump_rev(&xh, 4);
-#endif
-	memcpy(h, m, 16*4);
-	h[0] ^= SHL32(xh, 5) ^ SHR32(q[16], 5);
-	h[1] ^= SHR32(xh, 7) ^ SHL32(q[17], 8);
-	h[2] ^= SHR32(xh, 5) ^ SHL32(q[18], 5);
-	h[3] ^= SHR32(xh, 1) ^ SHL32(q[19], 5);
-	h[4] ^= SHR32(xh, 3) ^ q[20];
-	h[5] ^= SHL32(xh, 6) ^ SHR32(q[21], 6);
-	h[6] ^= SHR32(xh, 4) ^ SHL32(q[22], 6);
-	h[7] ^= SHR32(xh,11) ^ SHL32(q[23], 2);
-	for(i=0; i<8; ++i){
-		h[i] += xl ^ q[24+i] ^ q[i];
-	}
-	for(i=0; i<8; ++i){
-		h[8+i] ^= xh ^ q[24+i];
-		h[8+i] += ROTL32(h[(4+i)%8],i+9);
-	}
-	h[ 8] += SHL32(xl, 8) ^ q[23] ^ q[ 8];
-	h[ 9] += SHR32(xl, 6) ^ q[16] ^ q[ 9];
-	h[10] += SHL32(xl, 6) ^ q[17] ^ q[10];
-	h[11] += SHL32(xl, 4) ^ q[18] ^ q[11];
-	h[12] += SHR32(xl, 3) ^ q[19] ^ q[12];
-	h[13] += SHR32(xl, 4) ^ q[20] ^ q[13];
-	h[14] += SHR32(xl, 7) ^ q[21] ^ q[14];
-	h[15] += SHR32(xl, 2) ^ q[22] ^ q[15];
-}
-
-void bmw_small_nextBlock(bmw_small_ctx_t* ctx, const void* block){
-	uint32_t q[32];
-	dump_x(block, 16, 'M');
-	bmw_small_f0(q, ctx->h, block);
-	dump_x(q, 16, 'Q');
-	bmw_small_f1(q, block);
-	dump_x(q, 32, 'Q');
-	bmw_small_f2(ctx->h, q, block);
-	ctx->counter += 1;
-	ctx_dump(ctx);
-}
-
-void bmw_small_lastBlock(bmw_small_ctx_t* ctx, const void* block, uint16_t length_b){
-	uint8_t buffer[64];
-	while(length_b >= BMW_SMALL_BLOCKSIZE){
-		bmw_small_nextBlock(ctx, block);
-		length_b -= BMW_SMALL_BLOCKSIZE;
-		block = (uint8_t*)block + BMW_SMALL_BLOCKSIZE_B;
-	}
-	memset(buffer, 0, 64);
-	memcpy(buffer, block, (length_b+7)/8);
-	buffer[length_b>>3] |= 0x80 >> (length_b&0x07);
-	if(length_b+1>64*8-64){
-		bmw_small_nextBlock(ctx, buffer);
-		memset(buffer, 0, 64-8);
-		ctx->counter -= 1;
-	}
-	*((uint64_t*)&(buffer[64-8])) = (uint64_t)(ctx->counter*512LL)+(uint64_t)length_b;
-	bmw_small_nextBlock(ctx, buffer);
-}
-
-void bmw224_init(bmw224_ctx_t* ctx){
-	uint8_t i;
-	ctx->h[0] = 0x00010203;
-	for(i=1; i<16; ++i){
-		ctx->h[i] = ctx->h[i-1]+ 0x04040404;
-	}
-#if BUG24	
-	ctx->h[13] = 0x24353637;
-#endif
-	ctx->counter=0;
-	ctx_dump(ctx);
-}
-
-void bmw256_init(bmw256_ctx_t* ctx){
-	uint8_t i;
-	ctx->h[0] = 0x40414243;
-	for(i=1; i<16; ++i){
-		ctx->h[i] = ctx->h[i-1]+ 0x04040404;
-	}
-	ctx->counter=0;
-	ctx_dump(ctx);
-}
-
-void bmw224_nextBlock(bmw224_ctx_t* ctx, const void* block){
-	bmw_small_nextBlock(ctx, block);
-}
-
-void bmw256_nextBlock(bmw256_ctx_t* ctx, const void* block){
-	bmw_small_nextBlock(ctx, block);
-}
-
-void bmw224_lastBlock(bmw224_ctx_t* ctx, const void* block, uint16_t length_b){
-	bmw_small_lastBlock(ctx, block, length_b);
-}
-
-void bmw256_lastBlock(bmw256_ctx_t* ctx, const void* block, uint16_t length_b){
-	bmw_small_lastBlock(ctx, block, length_b);
-}
-
-void bmw224_ctx2hash(void* dest, const bmw224_ctx_t* ctx){
-	memcpy(dest, &(ctx->h[9]), 224/8);
-}
-
-void bmw256_ctx2hash(void* dest, const bmw256_ctx_t* ctx){
-	memcpy(dest, &(ctx->h[8]), 256/8);
-}
-
-void bmw224(void* dest, const void* msg, uint32_t length_b){
-	bmw_small_ctx_t ctx;
-	bmw224_init(&ctx);
-	while(length_b>=BMW_SMALL_BLOCKSIZE){
-		bmw_small_nextBlock(&ctx, msg);
-		length_b -= BMW_SMALL_BLOCKSIZE;
-		msg = (uint8_t*)msg + BMW_SMALL_BLOCKSIZE_B;
-	}
-	bmw_small_lastBlock(&ctx, msg, length_b);
-	bmw224_ctx2hash(dest, &ctx);
-}
-
-void bmw256(void* dest, const void* msg, uint32_t length_b){
-	bmw_small_ctx_t ctx;
-	bmw256_init(&ctx);
-	while(length_b>=BMW_SMALL_BLOCKSIZE){
-		bmw_small_nextBlock(&ctx, msg);
-		length_b -= BMW_SMALL_BLOCKSIZE;
-		msg = (uint8_t*)msg + BMW_SMALL_BLOCKSIZE_B;
-	}
-	bmw_small_lastBlock(&ctx, msg, length_b);
-	bmw256_ctx2hash(dest, &ctx);
-}
-
diff --git a/bmw_small.h b/bmw_small.h
deleted file mode 100644
index f314712..0000000
--- a/bmw_small.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/* bmw_small.h */
-/*
-    This file is part of the AVR-Crypto-Lib.
-    Copyright (C) 2009  Daniel Otte (daniel.otte@rub.de)
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-/*
- * \file    bmw_small.h
- * \author  Daniel Otte
- * \email   daniel.otte@rub.de
- * \date    2009-04-27
- * \license GPLv3 or later
- * 
- */
-#ifndef BMW_SMALL_H_
-#define BMW_SMALL_H_
-
-#include <stdint.h>
-
-#define BMW_SMALL_BLOCKSIZE   512
-#define BMW_SMALL_BLOCKSIZE_B ((BMW_SMALL_BLOCKSIZE+7)/8)
-#define BMW224_BLOCKSIZE      BMW_SMALL_BLOCKSIZE
-#define BMW224_BLOCKSIZE_B    BMW_SMALL_BLOCKSIZE_B
-#define BMW256_BLOCKSIZE      BMW_SMALL_BLOCKSIZE
-#define BMW256_BLOCKSIZE_B    BMW_SMALL_BLOCKSIZE_B
-
-typedef struct {
-	uint32_t h[16];
-	uint32_t counter;
-} bmw_small_ctx_t;
-
-typedef bmw_small_ctx_t bmw224_ctx_t;
-typedef bmw_small_ctx_t bmw256_ctx_t;
-
-void bmw224_init(bmw224_ctx_t* ctx);
-void bmw256_init(bmw256_ctx_t* ctx);
-
-void bmw_small_nextBlock(bmw_small_ctx_t* ctx, const void* block);
-void bmw_small_lastBlock(bmw_small_ctx_t* ctx, const void* block, uint16_t length_b);
-
-void bmw224_nextBlock(bmw224_ctx_t* ctx, const void* block);
-void bmw224_lastBlock(bmw224_ctx_t* ctx, const void* block, uint16_t length_b);
-
-void bmw256_nextBlock(bmw256_ctx_t* ctx, const void* block);
-void bmw256_lastBlock(bmw256_ctx_t* ctx, const void* block, uint16_t length_b);
-
-void bmw224_ctx2hash(void* dest, const bmw224_ctx_t* ctx);
-void bmw256_ctx2hash(void* dest, const bmw256_ctx_t* ctx);
-
-void bmw224(void* dest, const void* msg, uint32_t length_b);
-void bmw256(void* dest, const void* msg, uint32_t length_b);
-
-#endif /* BMW_SMALL_H_ */
diff --git a/cast5-sbox.h b/cast5-sbox.h
deleted file mode 100644
index 4a8b6bf..0000000
--- a/cast5-sbox.h
+++ /dev/null
@@ -1,601 +0,0 @@
-/* cast5-sbox.h */
-/*
-    This file is part of the AVR-Crypto-Lib.
-    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-/* 
- * File:	cast5-sbox.h
- * Author:	Daniel Otte
- * Date: 	26.07.2006
- * License: GPL
- * Description: sboxes for CAST5 (aka CAST-128) cipher algorithm as described in RFC 2144.
- * 
- */
- 
-#ifndef CAST5_SBOX_H_
-#define CAST5_SBOX_H_
-
-#include <avr/pgmspace.h>
-#include <stdint.h>
-
-#ifndef BIG_ENDIAN
-
-uint32_t s1[] PROGMEM = {
-0x30fb40d4UL, 0x9fa0ff0bUL, 0x6beccd2fUL, 0x3f258c7aUL, 0x1e213f2fUL, 0x9c004dd3UL, 0x6003e540UL, 0xcf9fc949UL,
-0xbfd4af27UL, 0x88bbbdb5UL, 0xe2034090UL, 0x98d09675UL, 0x6e63a0e0UL, 0x15c361d2UL, 0xc2e7661dUL, 0x22d4ff8eUL,
-0x28683b6fUL, 0xc07fd059UL, 0xff2379c8UL, 0x775f50e2UL, 0x43c340d3UL, 0xdf2f8656UL, 0x887ca41aUL, 0xa2d2bd2dUL,
-0xa1c9e0d6UL, 0x346c4819UL, 0x61b76d87UL, 0x22540f2fUL, 0x2abe32e1UL, 0xaa54166bUL, 0x22568e3aUL, 0xa2d341d0UL,
-0x66db40c8UL, 0xa784392fUL, 0x004dff2fUL, 0x2db9d2deUL, 0x97943facUL, 0x4a97c1d8UL, 0x527644b7UL, 0xb5f437a7UL,
-0xb82cbaefUL, 0xd751d159UL, 0x6ff7f0edUL, 0x5a097a1fUL, 0x827b68d0UL, 0x90ecf52eUL, 0x22b0c054UL, 0xbc8e5935UL,
-0x4b6d2f7fUL, 0x50bb64a2UL, 0xd2664910UL, 0xbee5812dUL, 0xb7332290UL, 0xe93b159fUL, 0xb48ee411UL, 0x4bff345dUL,
-0xfd45c240UL, 0xad31973fUL, 0xc4f6d02eUL, 0x55fc8165UL, 0xd5b1caadUL, 0xa1ac2daeUL, 0xa2d4b76dUL, 0xc19b0c50UL,
-0x882240f2UL, 0x0c6e4f38UL, 0xa4e4bfd7UL, 0x4f5ba272UL, 0x564c1d2fUL, 0xc59c5319UL, 0xb949e354UL, 0xb04669feUL,
-0xb1b6ab8aUL, 0xc71358ddUL, 0x6385c545UL, 0x110f935dUL, 0x57538ad5UL, 0x6a390493UL, 0xe63d37e0UL, 0x2a54f6b3UL,
-0x3a787d5fUL, 0x6276a0b5UL, 0x19a6fcdfUL, 0x7a42206aUL, 0x29f9d4d5UL, 0xf61b1891UL, 0xbb72275eUL, 0xaa508167UL,
-0x38901091UL, 0xc6b505ebUL, 0x84c7cb8cUL, 0x2ad75a0fUL, 0x874a1427UL, 0xa2d1936bUL, 0x2ad286afUL, 0xaa56d291UL,
-0xd7894360UL, 0x425c750dUL, 0x93b39e26UL, 0x187184c9UL, 0x6c00b32dUL, 0x73e2bb14UL, 0xa0bebc3cUL, 0x54623779UL,
-0x64459eabUL, 0x3f328b82UL, 0x7718cf82UL, 0x59a2cea6UL, 0x04ee002eUL, 0x89fe78e6UL, 0x3fab0950UL, 0x325ff6c2UL,
-0x81383f05UL, 0x6963c5c8UL, 0x76cb5ad6UL, 0xd49974c9UL, 0xca180dcfUL, 0x380782d5UL, 0xc7fa5cf6UL, 0x8ac31511UL,
-0x35e79e13UL, 0x47da91d0UL, 0xf40f9086UL, 0xa7e2419eUL, 0x31366241UL, 0x051ef495UL, 0xaa573b04UL, 0x4a805d8dUL,
-0x548300d0UL, 0x00322a3cUL, 0xbf64cddfUL, 0xba57a68eUL, 0x75c6372bUL, 0x50afd341UL, 0xa7c13275UL, 0x915a0bf5UL,
-0x6b54bfabUL, 0x2b0b1426UL, 0xab4cc9d7UL, 0x449ccd82UL, 0xf7fbf265UL, 0xab85c5f3UL, 0x1b55db94UL, 0xaad4e324UL,
-0xcfa4bd3fUL, 0x2deaa3e2UL, 0x9e204d02UL, 0xc8bd25acUL, 0xeadf55b3UL, 0xd5bd9e98UL, 0xe31231b2UL, 0x2ad5ad6cUL,
-0x954329deUL, 0xadbe4528UL, 0xd8710f69UL, 0xaa51c90fUL, 0xaa786bf6UL, 0x22513f1eUL, 0xaa51a79bUL, 0x2ad344ccUL,
-0x7b5a41f0UL, 0xd37cfbadUL, 0x1b069505UL, 0x41ece491UL, 0xb4c332e6UL, 0x032268d4UL, 0xc9600accUL, 0xce387e6dUL,
-0xbf6bb16cUL, 0x6a70fb78UL, 0x0d03d9c9UL, 0xd4df39deUL, 0xe01063daUL, 0x4736f464UL, 0x5ad328d8UL, 0xb347cc96UL,
-0x75bb0fc3UL, 0x98511bfbUL, 0x4ffbcc35UL, 0xb58bcf6aUL, 0xe11f0abcUL, 0xbfc5fe4aUL, 0xa70aec10UL, 0xac39570aUL,
-0x3f04442fUL, 0x6188b153UL, 0xe0397a2eUL, 0x5727cb79UL, 0x9ceb418fUL, 0x1cacd68dUL, 0x2ad37c96UL, 0x0175cb9dUL,
-0xc69dff09UL, 0xc75b65f0UL, 0xd9db40d8UL, 0xec0e7779UL, 0x4744ead4UL, 0xb11c3274UL, 0xdd24cb9eUL, 0x7e1c54bdUL,
-0xf01144f9UL, 0xd2240eb1UL, 0x9675b3fdUL, 0xa3ac3755UL, 0xd47c27afUL, 0x51c85f4dUL, 0x56907596UL, 0xa5bb15e6UL,
-0x580304f0UL, 0xca042cf1UL, 0x011a37eaUL, 0x8dbfaadbUL, 0x35ba3e4aUL, 0x3526ffa0UL, 0xc37b4d09UL, 0xbc306ed9UL,
-0x98a52666UL, 0x5648f725UL, 0xff5e569dUL, 0x0ced63d0UL, 0x7c63b2cfUL, 0x700b45e1UL, 0xd5ea50f1UL, 0x85a92872UL,
-0xaf1fbda7UL, 0xd4234870UL, 0xa7870bf3UL, 0x2d3b4d79UL, 0x42e04198UL, 0x0cd0ede7UL, 0x26470db8UL, 0xf881814cUL,
-0x474d6ad7UL, 0x7c0c5e5cUL, 0xd1231959UL, 0x381b7298UL, 0xf5d2f4dbUL, 0xab838653UL, 0x6e2f1e23UL, 0x83719c9eUL,
-0xbd91e046UL, 0x9a56456eUL, 0xdc39200cUL, 0x20c8c571UL, 0x962bda1cUL, 0xe1e696ffUL, 0xb141ab08UL, 0x7cca89b9UL,
-0x1a69e783UL, 0x02cc4843UL, 0xa2f7c579UL, 0x429ef47dUL, 0x427b169cUL, 0x5ac9f049UL, 0xdd8f0f00UL, 0x5c8165bfUL};
-
-uint32_t s2[] PROGMEM = {
-0x1f201094UL, 0xef0ba75bUL, 0x69e3cf7eUL, 0x393f4380UL, 0xfe61cf7aUL, 0xeec5207aUL, 0x55889c94UL, 0x72fc0651UL,
-0xada7ef79UL, 0x4e1d7235UL, 0xd55a63ceUL, 0xde0436baUL, 0x99c430efUL, 0x5f0c0794UL, 0x18dcdb7dUL, 0xa1d6eff3UL,
-0xa0b52f7bUL, 0x59e83605UL, 0xee15b094UL, 0xe9ffd909UL, 0xdc440086UL, 0xef944459UL, 0xba83ccb3UL, 0xe0c3cdfbUL,
-0xd1da4181UL, 0x3b092ab1UL, 0xf997f1c1UL, 0xa5e6cf7bUL, 0x01420ddbUL, 0xe4e7ef5bUL, 0x25a1ff41UL, 0xe180f806UL,
-0x1fc41080UL, 0x179bee7aUL, 0xd37ac6a9UL, 0xfe5830a4UL, 0x98de8b7fUL, 0x77e83f4eUL, 0x79929269UL, 0x24fa9f7bUL,
-0xe113c85bUL, 0xacc40083UL, 0xd7503525UL, 0xf7ea615fUL, 0x62143154UL, 0x0d554b63UL, 0x5d681121UL, 0xc866c359UL,
-0x3d63cf73UL, 0xcee234c0UL, 0xd4d87e87UL, 0x5c672b21UL, 0x071f6181UL, 0x39f7627fUL, 0x361e3084UL, 0xe4eb573bUL,
-0x602f64a4UL, 0xd63acd9cUL, 0x1bbc4635UL, 0x9e81032dUL, 0x2701f50cUL, 0x99847ab4UL, 0xa0e3df79UL, 0xba6cf38cUL,
-0x10843094UL, 0x2537a95eUL, 0xf46f6ffeUL, 0xa1ff3b1fUL, 0x208cfb6aUL, 0x8f458c74UL, 0xd9e0a227UL, 0x4ec73a34UL,
-0xfc884f69UL, 0x3e4de8dfUL, 0xef0e0088UL, 0x3559648dUL, 0x8a45388cUL, 0x1d804366UL, 0x721d9bfdUL, 0xa58684bbUL,
-0xe8256333UL, 0x844e8212UL, 0x128d8098UL, 0xfed33fb4UL, 0xce280ae1UL, 0x27e19ba5UL, 0xd5a6c252UL, 0xe49754bdUL,
-0xc5d655ddUL, 0xeb667064UL, 0x77840b4dUL, 0xa1b6a801UL, 0x84db26a9UL, 0xe0b56714UL, 0x21f043b7UL, 0xe5d05860UL,
-0x54f03084UL, 0x066ff472UL, 0xa31aa153UL, 0xdadc4755UL, 0xb5625dbfUL, 0x68561be6UL, 0x83ca6b94UL, 0x2d6ed23bUL,
-0xeccf01dbUL, 0xa6d3d0baUL, 0xb6803d5cUL, 0xaf77a709UL, 0x33b4a34cUL, 0x397bc8d6UL, 0x5ee22b95UL, 0x5f0e5304UL,
-0x81ed6f61UL, 0x20e74364UL, 0xb45e1378UL, 0xde18639bUL, 0x881ca122UL, 0xb96726d1UL, 0x8049a7e8UL, 0x22b7da7bUL,
-0x5e552d25UL, 0x5272d237UL, 0x79d2951cUL, 0xc60d894cUL, 0x488cb402UL, 0x1ba4fe5bUL, 0xa4b09f6bUL, 0x1ca815cfUL,
-0xa20c3005UL, 0x8871df63UL, 0xb9de2fcbUL, 0x0cc6c9e9UL, 0x0beeff53UL, 0xe3214517UL, 0xb4542835UL, 0x9f63293cUL,
-0xee41e729UL, 0x6e1d2d7cUL, 0x50045286UL, 0x1e6685f3UL, 0xf33401c6UL, 0x30a22c95UL, 0x31a70850UL, 0x60930f13UL,
-0x73f98417UL, 0xa1269859UL, 0xec645c44UL, 0x52c877a9UL, 0xcdff33a6UL, 0xa02b1741UL, 0x7cbad9a2UL, 0x2180036fUL,
-0x50d99c08UL, 0xcb3f4861UL, 0xc26bd765UL, 0x64a3f6abUL, 0x80342676UL, 0x25a75e7bUL, 0xe4e6d1fcUL, 0x20c710e6UL,
-0xcdf0b680UL, 0x17844d3bUL, 0x31eef84dUL, 0x7e0824e4UL, 0x2ccb49ebUL, 0x846a3baeUL, 0x8ff77888UL, 0xee5d60f6UL,
-0x7af75673UL, 0x2fdd5cdbUL, 0xa11631c1UL, 0x30f66f43UL, 0xb3faec54UL, 0x157fd7faUL, 0xef8579ccUL, 0xd152de58UL,
-0xdb2ffd5eUL, 0x8f32ce19UL, 0x306af97aUL, 0x02f03ef8UL, 0x99319ad5UL, 0xc242fa0fUL, 0xa7e3ebb0UL, 0xc68e4906UL,
-0xb8da230cUL, 0x80823028UL, 0xdcdef3c8UL, 0xd35fb171UL, 0x088a1bc8UL, 0xbec0c560UL, 0x61a3c9e8UL, 0xbca8f54dUL,
-0xc72feffaUL, 0x22822e99UL, 0x82c570b4UL, 0xd8d94e89UL, 0x8b1c34bcUL, 0x301e16e6UL, 0x273be979UL, 0xb0ffeaa6UL,
-0x61d9b8c6UL, 0x00b24869UL, 0xb7ffce3fUL, 0x08dc283bUL, 0x43daf65aUL, 0xf7e19798UL, 0x7619b72fUL, 0x8f1c9ba4UL,
-0xdc8637a0UL, 0x16a7d3b1UL, 0x9fc393b7UL, 0xa7136eebUL, 0xc6bcc63eUL, 0x1a513742UL, 0xef6828bcUL, 0x520365d6UL,
-0x2d6a77abUL, 0x3527ed4bUL, 0x821fd216UL, 0x095c6e2eUL, 0xdb92f2fbUL, 0x5eea29cbUL, 0x145892f5UL, 0x91584f7fUL,
-0x5483697bUL, 0x2667a8ccUL, 0x85196048UL, 0x8c4baceaUL, 0x833860d4UL, 0x0d23e0f9UL, 0x6c387e8aUL, 0x0ae6d249UL,
-0xb284600cUL, 0xd835731dUL, 0xdcb1c647UL, 0xac4c56eaUL, 0x3ebd81b3UL, 0x230eabb0UL, 0x6438bc87UL, 0xf0b5b1faUL,
-0x8f5ea2b3UL, 0xfc184642UL, 0x0a036b7aUL, 0x4fb089bdUL, 0x649da589UL, 0xa345415eUL, 0x5c038323UL, 0x3e5d3bb9UL,
-0x43d79572UL, 0x7e6dd07cUL, 0x06dfdf1eUL, 0x6c6cc4efUL, 0x7160a539UL, 0x73bfbe70UL, 0x83877605UL, 0x4523ecf1UL};
-
-uint32_t s3[] PROGMEM = {
-0x8defc240UL, 0x25fa5d9fUL, 0xeb903dbfUL, 0xe810c907UL, 0x47607fffUL, 0x369fe44bUL, 0x8c1fc644UL, 0xaececa90UL,
-0xbeb1f9bfUL, 0xeefbcaeaUL, 0xe8cf1950UL, 0x51df07aeUL, 0x920e8806UL, 0xf0ad0548UL, 0xe13c8d83UL, 0x927010d5UL,
-0x11107d9fUL, 0x07647db9UL, 0xb2e3e4d4UL, 0x3d4f285eUL, 0xb9afa820UL, 0xfade82e0UL, 0xa067268bUL, 0x8272792eUL,
-0x553fb2c0UL, 0x489ae22bUL, 0xd4ef9794UL, 0x125e3fbcUL, 0x21fffceeUL, 0x825b1bfdUL, 0x9255c5edUL, 0x1257a240UL,
-0x4e1a8302UL, 0xbae07fffUL, 0x528246e7UL, 0x8e57140eUL, 0x3373f7bfUL, 0x8c9f8188UL, 0xa6fc4ee8UL, 0xc982b5a5UL,
-0xa8c01db7UL, 0x579fc264UL, 0x67094f31UL, 0xf2bd3f5fUL, 0x40fff7c1UL, 0x1fb78dfcUL, 0x8e6bd2c1UL, 0x437be59bUL,
-0x99b03dbfUL, 0xb5dbc64bUL, 0x638dc0e6UL, 0x55819d99UL, 0xa197c81cUL, 0x4a012d6eUL, 0xc5884a28UL, 0xccc36f71UL,
-0xb843c213UL, 0x6c0743f1UL, 0x8309893cUL, 0x0feddd5fUL, 0x2f7fe850UL, 0xd7c07f7eUL, 0x02507fbfUL, 0x5afb9a04UL,
-0xa747d2d0UL, 0x1651192eUL, 0xaf70bf3eUL, 0x58c31380UL, 0x5f98302eUL, 0x727cc3c4UL, 0x0a0fb402UL, 0x0f7fef82UL,
-0x8c96fdadUL, 0x5d2c2aaeUL, 0x8ee99a49UL, 0x50da88b8UL, 0x8427f4a0UL, 0x1eac5790UL, 0x796fb449UL, 0x8252dc15UL,
-0xefbd7d9bUL, 0xa672597dUL, 0xada840d8UL, 0x45f54504UL, 0xfa5d7403UL, 0xe83ec305UL, 0x4f91751aUL, 0x925669c2UL,
-0x23efe941UL, 0xa903f12eUL, 0x60270df2UL, 0x0276e4b6UL, 0x94fd6574UL, 0x927985b2UL, 0x8276dbcbUL, 0x02778176UL,
-0xf8af918dUL, 0x4e48f79eUL, 0x8f616ddfUL, 0xe29d840eUL, 0x842f7d83UL, 0x340ce5c8UL, 0x96bbb682UL, 0x93b4b148UL,
-0xef303cabUL, 0x984faf28UL, 0x779faf9bUL, 0x92dc560dUL, 0x224d1e20UL, 0x8437aa88UL, 0x7d29dc96UL, 0x2756d3dcUL,
-0x8b907ceeUL, 0xb51fd240UL, 0xe7c07ce3UL, 0xe566b4a1UL, 0xc3e9615eUL, 0x3cf8209dUL, 0x6094d1e3UL, 0xcd9ca341UL,
-0x5c76460eUL, 0x00ea983bUL, 0xd4d67881UL, 0xfd47572cUL, 0xf76cedd9UL, 0xbda8229cUL, 0x127dadaaUL, 0x438a074eUL,
-0x1f97c090UL, 0x081bdb8aUL, 0x93a07ebeUL, 0xb938ca15UL, 0x97b03cffUL, 0x3dc2c0f8UL, 0x8d1ab2ecUL, 0x64380e51UL,
-0x68cc7bfbUL, 0xd90f2788UL, 0x12490181UL, 0x5de5ffd4UL, 0xdd7ef86aUL, 0x76a2e214UL, 0xb9a40368UL, 0x925d958fUL,
-0x4b39fffaUL, 0xba39aee9UL, 0xa4ffd30bUL, 0xfaf7933bUL, 0x6d498623UL, 0x193cbcfaUL, 0x27627545UL, 0x825cf47aUL,
-0x61bd8ba0UL, 0xd11e42d1UL, 0xcead04f4UL, 0x127ea392UL, 0x10428db7UL, 0x8272a972UL, 0x9270c4a8UL, 0x127de50bUL,
-0x285ba1c8UL, 0x3c62f44fUL, 0x35c0eaa5UL, 0xe805d231UL, 0x428929fbUL, 0xb4fcdf82UL, 0x4fb66a53UL, 0x0e7dc15bUL,
-0x1f081fabUL, 0x108618aeUL, 0xfcfd086dUL, 0xf9ff2889UL, 0x694bcc11UL, 0x236a5caeUL, 0x12deca4dUL, 0x2c3f8cc5UL,
-0xd2d02dfeUL, 0xf8ef5896UL, 0xe4cf52daUL, 0x95155b67UL, 0x494a488cUL, 0xb9b6a80cUL, 0x5c8f82bcUL, 0x89d36b45UL,
-0x3a609437UL, 0xec00c9a9UL, 0x44715253UL, 0x0a874b49UL, 0xd773bc40UL, 0x7c34671cUL, 0x02717ef6UL, 0x4feb5536UL,
-0xa2d02fffUL, 0xd2bf60c4UL, 0xd43f03c0UL, 0x50b4ef6dUL, 0x07478cd1UL, 0x006e1888UL, 0xa2e53f55UL, 0xb9e6d4bcUL,
-0xa2048016UL, 0x97573833UL, 0xd7207d67UL, 0xde0f8f3dUL, 0x72f87b33UL, 0xabcc4f33UL, 0x7688c55dUL, 0x7b00a6b0UL,
-0x947b0001UL, 0x570075d2UL, 0xf9bb88f8UL, 0x8942019eUL, 0x4264a5ffUL, 0x856302e0UL, 0x72dbd92bUL, 0xee971b69UL,
-0x6ea22fdeUL, 0x5f08ae2bUL, 0xaf7a616dUL, 0xe5c98767UL, 0xcf1febd2UL, 0x61efc8c2UL, 0xf1ac2571UL, 0xcc8239c2UL,
-0x67214cb8UL, 0xb1e583d1UL, 0xb7dc3e62UL, 0x7f10bdceUL, 0xf90a5c38UL, 0x0ff0443dUL, 0x606e6dc6UL, 0x60543a49UL,
-0x5727c148UL, 0x2be98a1dUL, 0x8ab41738UL, 0x20e1be24UL, 0xaf96da0fUL, 0x68458425UL, 0x99833be5UL, 0x600d457dUL,
-0x282f9350UL, 0x8334b362UL, 0xd91d1120UL, 0x2b6d8da0UL, 0x642b1e31UL, 0x9c305a00UL, 0x52bce688UL, 0x1b03588aUL,
-0xf7baefd5UL, 0x4142ed9cUL, 0xa4315c11UL, 0x83323ec5UL, 0xdfef4636UL, 0xa133c501UL, 0xe9d3531cUL, 0xee353783UL};
-
-uint32_t s4[] PROGMEM = {
-0x9db30420UL, 0x1fb6e9deUL, 0xa7be7befUL, 0xd273a298UL, 0x4a4f7bdbUL, 0x64ad8c57UL, 0x85510443UL, 0xfa020ed1UL,
-0x7e287affUL, 0xe60fb663UL, 0x095f35a1UL, 0x79ebf120UL, 0xfd059d43UL, 0x6497b7b1UL, 0xf3641f63UL, 0x241e4adfUL,
-0x28147f5fUL, 0x4fa2b8cdUL, 0xc9430040UL, 0x0cc32220UL, 0xfdd30b30UL, 0xc0a5374fUL, 0x1d2d00d9UL, 0x24147b15UL,
-0xee4d111aUL, 0x0fca5167UL, 0x71ff904cUL, 0x2d195ffeUL, 0x1a05645fUL, 0x0c13fefeUL, 0x081b08caUL, 0x05170121UL,
-0x80530100UL, 0xe83e5efeUL, 0xac9af4f8UL, 0x7fe72701UL, 0xd2b8ee5fUL, 0x06df4261UL, 0xbb9e9b8aUL, 0x7293ea25UL,
-0xce84ffdfUL, 0xf5718801UL, 0x3dd64b04UL, 0xa26f263bUL, 0x7ed48400UL, 0x547eebe6UL, 0x446d4ca0UL, 0x6cf3d6f5UL,
-0x2649abdfUL, 0xaea0c7f5UL, 0x36338cc1UL, 0x503f7e93UL, 0xd3772061UL, 0x11b638e1UL, 0x72500e03UL, 0xf80eb2bbUL,
-0xabe0502eUL, 0xec8d77deUL, 0x57971e81UL, 0xe14f6746UL, 0xc9335400UL, 0x6920318fUL, 0x081dbb99UL, 0xffc304a5UL,
-0x4d351805UL, 0x7f3d5ce3UL, 0xa6c866c6UL, 0x5d5bcca9UL, 0xdaec6feaUL, 0x9f926f91UL, 0x9f46222fUL, 0x3991467dUL,
-0xa5bf6d8eUL, 0x1143c44fUL, 0x43958302UL, 0xd0214eebUL, 0x022083b8UL, 0x3fb6180cUL, 0x18f8931eUL, 0x281658e6UL,
-0x26486e3eUL, 0x8bd78a70UL, 0x7477e4c1UL, 0xb506e07cUL, 0xf32d0a25UL, 0x79098b02UL, 0xe4eabb81UL, 0x28123b23UL,
-0x69dead38UL, 0x1574ca16UL, 0xdf871b62UL, 0x211c40b7UL, 0xa51a9ef9UL, 0x0014377bUL, 0x041e8ac8UL, 0x09114003UL,
-0xbd59e4d2UL, 0xe3d156d5UL, 0x4fe876d5UL, 0x2f91a340UL, 0x557be8deUL, 0x00eae4a7UL, 0x0ce5c2ecUL, 0x4db4bba6UL,
-0xe756bdffUL, 0xdd3369acUL, 0xec17b035UL, 0x06572327UL, 0x99afc8b0UL, 0x56c8c391UL, 0x6b65811cUL, 0x5e146119UL,
-0x6e85cb75UL, 0xbe07c002UL, 0xc2325577UL, 0x893ff4ecUL, 0x5bbfc92dUL, 0xd0ec3b25UL, 0xb7801ab7UL, 0x8d6d3b24UL,
-0x20c763efUL, 0xc366a5fcUL, 0x9c382880UL, 0x0ace3205UL, 0xaac9548aUL, 0xeca1d7c7UL, 0x041afa32UL, 0x1d16625aUL,
-0x6701902cUL, 0x9b757a54UL, 0x31d477f7UL, 0x9126b031UL, 0x36cc6fdbUL, 0xc70b8b46UL, 0xd9e66a48UL, 0x56e55a79UL,
-0x026a4cebUL, 0x52437effUL, 0x2f8f76b4UL, 0x0df980a5UL, 0x8674cde3UL, 0xedda04ebUL, 0x17a9be04UL, 0x2c18f4dfUL,
-0xb7747f9dUL, 0xab2af7b4UL, 0xefc34d20UL, 0x2e096b7cUL, 0x1741a254UL, 0xe5b6a035UL, 0x213d42f6UL, 0x2c1c7c26UL,
-0x61c2f50fUL, 0x6552daf9UL, 0xd2c231f8UL, 0x25130f69UL, 0xd8167fa2UL, 0x0418f2c8UL, 0x001a96a6UL, 0x0d1526abUL,
-0x63315c21UL, 0x5e0a72ecUL, 0x49bafefdUL, 0x187908d9UL, 0x8d0dbd86UL, 0x311170a7UL, 0x3e9b640cUL, 0xcc3e10d7UL,
-0xd5cad3b6UL, 0x0caec388UL, 0xf73001e1UL, 0x6c728affUL, 0x71eae2a1UL, 0x1f9af36eUL, 0xcfcbd12fUL, 0xc1de8417UL,
-0xac07be6bUL, 0xcb44a1d8UL, 0x8b9b0f56UL, 0x013988c3UL, 0xb1c52fcaUL, 0xb4be31cdUL, 0xd8782806UL, 0x12a3a4e2UL,
-0x6f7de532UL, 0x58fd7eb6UL, 0xd01ee900UL, 0x24adffc2UL, 0xf4990fc5UL, 0x9711aac5UL, 0x001d7b95UL, 0x82e5e7d2UL,
-0x109873f6UL, 0x00613096UL, 0xc32d9521UL, 0xada121ffUL, 0x29908415UL, 0x7fbb977fUL, 0xaf9eb3dbUL, 0x29c9ed2aUL,
-0x5ce2a465UL, 0xa730f32cUL, 0xd0aa3fe8UL, 0x8a5cc091UL, 0xd49e2ce7UL, 0x0ce454a9UL, 0xd60acd86UL, 0x015f1919UL,
-0x77079103UL, 0xdea03af6UL, 0x78a8565eUL, 0xdee356dfUL, 0x21f05cbeUL, 0x8b75e387UL, 0xb3c50651UL, 0xb8a5c3efUL,
-0xd8eeb6d2UL, 0xe523be77UL, 0xc2154529UL, 0x2f69efdfUL, 0xafe67afbUL, 0xf470c4b2UL, 0xf3e0eb5bUL, 0xd6cc9876UL,
-0x39e4460cUL, 0x1fda8538UL, 0x1987832fUL, 0xca007367UL, 0xa99144f8UL, 0x296b299eUL, 0x492fc295UL, 0x9266beabUL,
-0xb5676e69UL, 0x9bd3dddaUL, 0xdf7e052fUL, 0xdb25701cUL, 0x1b5e51eeUL, 0xf65324e6UL, 0x6afce36cUL, 0x0316cc04UL,
-0x8644213eUL, 0xb7dc59d0UL, 0x7965291fUL, 0xccd6fd43UL, 0x41823979UL, 0x932bcdf6UL, 0xb657c34dUL, 0x4edfd282UL,
-0x7ae5290cUL, 0x3cb9536bUL, 0x851e20feUL, 0x9833557eUL, 0x13ecf0b0UL, 0xd3ffb372UL, 0x3f85c5c1UL, 0x0aef7ed2UL};
-
-#else
-
-uint32_t s1[] PROGMEM = {
-0xd440fb30UL, 0x0bffa09fUL, 0x2fcdec6bUL, 0x7a8c253fUL, 0x2f3f211eUL, 0xd34d009cUL, 0x40e50360UL, 0x49c99fcfUL,
-0x27afd4bfUL, 0xb5bdbb88UL, 0x904003e2UL, 0x7596d098UL, 0xe0a0636eUL, 0xd261c315UL, 0x1d66e7c2UL, 0x8effd422UL,
-0x6f3b6828UL, 0x59d07fc0UL, 0xc87923ffUL, 0xe2505f77UL, 0xd340c343UL, 0x56862fdfUL, 0x1aa47c88UL, 0x2dbdd2a2UL,
-0xd6e0c9a1UL, 0x19486c34UL, 0x876db761UL, 0x2f0f5422UL, 0xe132be2aUL, 0x6b1654aaUL, 0x3a8e5622UL, 0xd041d3a2UL,
-0xc840db66UL, 0x2f3984a7UL, 0x2fff4d00UL, 0xded2b92dUL, 0xac3f9497UL, 0xd8c1974aUL, 0xb7447652UL, 0xa737f4b5UL,
-0xefba2cb8UL, 0x59d151d7UL, 0xedf0f76fUL, 0x1f7a095aUL, 0xd0687b82UL, 0x2ef5ec90UL, 0x54c0b022UL, 0x35598ebcUL,
-0x7f2f6d4bUL, 0xa264bb50UL, 0x104966d2UL, 0x2d81e5beUL, 0x902233b7UL, 0x9f153be9UL, 0x11e48eb4UL, 0x5d34ff4bUL,
-0x40c245fdUL, 0x3f9731adUL, 0x2ed0f6c4UL, 0x6581fc55UL, 0xadcab1d5UL, 0xae2daca1UL, 0x6db7d4a2UL, 0x500c9bc1UL,
-0xf2402288UL, 0x384f6e0cUL, 0xd7bfe4a4UL, 0x72a25b4fUL, 0x2f1d4c56UL, 0x19539cc5UL, 0x54e349b9UL, 0xfe6946b0UL,
-0x8aabb6b1UL, 0xdd5813c7UL, 0x45c58563UL, 0x5d930f11UL, 0xd58a5357UL, 0x9304396aUL, 0xe0373de6UL, 0xb3f6542aUL,
-0x5f7d783aUL, 0xb5a07662UL, 0xdffca619UL, 0x6a20427aUL, 0xd5d4f929UL, 0x91181bf6UL, 0x5e2772bbUL, 0x678150aaUL,
-0x91109038UL, 0xeb05b5c6UL, 0x8ccbc784UL, 0x0f5ad72aUL, 0x27144a87UL, 0x6b93d1a2UL, 0xaf86d22aUL, 0x91d256aaUL,
-0x604389d7UL, 0x0d755c42UL, 0x269eb393UL, 0xc9847118UL, 0x2db3006cUL, 0x14bbe273UL, 0x3cbcbea0UL, 0x79376254UL,
-0xab9e4564UL, 0x828b323fUL, 0x82cf1877UL, 0xa6cea259UL, 0x2e00ee04UL, 0xe678fe89UL, 0x5009ab3fUL, 0xc2f65f32UL,
-0x053f3881UL, 0xc8c56369UL, 0xd65acb76UL, 0xc97499d4UL, 0xcf0d18caUL, 0xd5820738UL, 0xf65cfac7UL, 0x1115c38aUL,
-0x139ee735UL, 0xd091da47UL, 0x86900ff4UL, 0x9e41e2a7UL, 0x41623631UL, 0x95f41e05UL, 0x043b57aaUL, 0x8d5d804aUL,
-0xd0008354UL, 0x3c2a3200UL, 0xdfcd64bfUL, 0x8ea657baUL, 0x2b37c675UL, 0x41d3af50UL, 0x7532c1a7UL, 0xf50b5a91UL,
-0xabbf546bUL, 0x26140b2bUL, 0xd7c94cabUL, 0x82cd9c44UL, 0x65f2fbf7UL, 0xf3c585abUL, 0x94db551bUL, 0x24e3d4aaUL,
-0x3fbda4cfUL, 0xe2a3ea2dUL, 0x024d209eUL, 0xac25bdc8UL, 0xb355dfeaUL, 0x989ebdd5UL, 0xb23112e3UL, 0x6cadd52aUL,
-0xde294395UL, 0x2845beadUL, 0x690f71d8UL, 0x0fc951aaUL, 0xf66b78aaUL, 0x1e3f5122UL, 0x9ba751aaUL, 0xcc44d32aUL,
-0xf0415a7bUL, 0xadfb7cd3UL, 0x0595061bUL, 0x91e4ec41UL, 0xe632c3b4UL, 0xd4682203UL, 0xcc0a60c9UL, 0x6d7e38ceUL,
-0x6cb16bbfUL, 0x78fb706aUL, 0xc9d9030dUL, 0xde39dfd4UL, 0xda6310e0UL, 0x64f43647UL, 0xd828d35aUL, 0x96cc47b3UL,
-0xc30fbb75UL, 0xfb1b5198UL, 0x35ccfb4fUL, 0x6acf8bb5UL, 0xbc0a1fe1UL, 0x4afec5bfUL, 0x10ec0aa7UL, 0x0a5739acUL,
-0x2f44043fUL, 0x53b18861UL, 0x2e7a39e0UL, 0x79cb2757UL, 0x8f41eb9cUL, 0x8dd6ac1cUL, 0x967cd32aUL, 0x9dcb7501UL,
-0x09ff9dc6UL, 0xf0655bc7UL, 0xd840dbd9UL, 0x79770eecUL, 0xd4ea4447UL, 0x74321cb1UL, 0x9ecb24ddUL, 0xbd541c7eUL,
-0xf94411f0UL, 0xb10e24d2UL, 0xfdb37596UL, 0x5537aca3UL, 0xaf277cd4UL, 0x4d5fc851UL, 0x96759056UL, 0xe615bba5UL,
-0xf0040358UL, 0xf12c04caUL, 0xea371a01UL, 0xdbaabf8dUL, 0x4a3eba35UL, 0xa0ff2635UL, 0x094d7bc3UL, 0xd96e30bcUL,
-0x6626a598UL, 0x25f74856UL, 0x9d565effUL, 0xd063ed0cUL, 0xcfb2637cUL, 0xe1450b70UL, 0xf150ead5UL, 0x7228a985UL,
-0xa7bd1fafUL, 0x704823d4UL, 0xf30b87a7UL, 0x794d3b2dUL, 0x9841e042UL, 0xe7edd00cUL, 0xb80d4726UL, 0x4c8181f8UL,
-0xd76a4d47UL, 0x5c5e0c7cUL, 0x591923d1UL, 0x98721b38UL, 0xdbf4d2f5UL, 0x538683abUL, 0x231e2f6eUL, 0x9e9c7183UL,
-0x46e091bdUL, 0x6e45569aUL, 0x0c2039dcUL, 0x71c5c820UL, 0x1cda2b96UL, 0xff96e6e1UL, 0x08ab41b1UL, 0xb989ca7cUL,
-0x83e7691aUL, 0x4348cc02UL, 0x79c5f7a2UL, 0x7df49e42UL, 0x9c167b42UL, 0x49f0c95aUL, 0x000f8fddUL, 0xbf65815cUL};
-
-uint32_t s2[] PROGMEM = {
-0x9410201fUL, 0x5ba70befUL, 0x7ecfe369UL, 0x80433f39UL, 0x7acf61feUL, 0x7a20c5eeUL, 0x949c8855UL, 0x5106fc72UL,
-0x79efa7adUL, 0x35721d4eUL, 0xce635ad5UL, 0xba3604deUL, 0xef30c499UL, 0x94070c5fUL, 0x7ddbdc18UL, 0xf3efd6a1UL,
-0x7b2fb5a0UL, 0x0536e859UL, 0x94b015eeUL, 0x09d9ffe9UL, 0x860044dcUL, 0x594494efUL, 0xb3cc83baUL, 0xfbcdc3e0UL,
-0x8141dad1UL, 0xb12a093bUL, 0xc1f197f9UL, 0x7bcfe6a5UL, 0xdb0d4201UL, 0x5befe7e4UL, 0x41ffa125UL, 0x06f880e1UL,
-0x8010c41fUL, 0x7aee9b17UL, 0xa9c67ad3UL, 0xa43058feUL, 0x7f8bde98UL, 0x4e3fe877UL, 0x69929279UL, 0x7b9ffa24UL,
-0x5bc813e1UL, 0x8300c4acUL, 0x253550d7UL, 0x5f61eaf7UL, 0x54311462UL, 0x634b550dUL, 0x2111685dUL, 0x59c366c8UL,
-0x73cf633dUL, 0xc034e2ceUL, 0x877ed8d4UL, 0x212b675cUL, 0x81611f07UL, 0x7f62f739UL, 0x84301e36UL, 0x3b57ebe4UL,
-0xa4642f60UL, 0x9ccd3ad6UL, 0x3546bc1bUL, 0x2d03819eUL, 0x0cf50127UL, 0xb47a8499UL, 0x79dfe3a0UL, 0x8cf36cbaUL,
-0x94308410UL, 0x5ea93725UL, 0xfe6f6ff4UL, 0x1f3bffa1UL, 0x6afb8c20UL, 0x748c458fUL, 0x27a2e0d9UL, 0x343ac74eUL,
-0x694f88fcUL, 0xdfe84d3eUL, 0x88000eefUL, 0x8d645935UL, 0x8c38458aUL, 0x6643801dUL, 0xfd9b1d72UL, 0xbb8486a5UL,
-0x336325e8UL, 0x12824e84UL, 0x98808d12UL, 0xb43fd3feUL, 0xe10a28ceUL, 0xa59be127UL, 0x52c2a6d5UL, 0xbd5497e4UL,
-0xdd55d6c5UL, 0x647066ebUL, 0x4d0b8477UL, 0x01a8b6a1UL, 0xa926db84UL, 0x1467b5e0UL, 0xb743f021UL, 0x6058d0e5UL,
-0x8430f054UL, 0x72f46f06UL, 0x53a11aa3UL, 0x5547dcdaUL, 0xbf5d62b5UL, 0xe61b5668UL, 0x946bca83UL, 0x3bd26e2dUL,
-0xdb01cfecUL, 0xbad0d3a6UL, 0x5c3d80b6UL, 0x09a777afUL, 0x4ca3b433UL, 0xd6c87b39UL, 0x952be25eUL, 0x04530e5fUL,
-0x616fed81UL, 0x6443e720UL, 0x78135eb4UL, 0x9b6318deUL, 0x22a11c88UL, 0xd12667b9UL, 0xe8a74980UL, 0x7bdab722UL,
-0x252d555eUL, 0x37d27252UL, 0x1c95d279UL, 0x4c890dc6UL, 0x02b48c48UL, 0x5bfea41bUL, 0x6b9fb0a4UL, 0xcf15a81cUL,
-0x05300ca2UL, 0x63df7188UL, 0xcb2fdeb9UL, 0xe9c9c60cUL, 0x53ffee0bUL, 0x174521e3UL, 0x352854b4UL, 0x3c29639fUL,
-0x29e741eeUL, 0x7c2d1d6eUL, 0x86520450UL, 0xf385661eUL, 0xc60134f3UL, 0x952ca230UL, 0x5008a731UL, 0x130f9360UL,
-0x1784f973UL, 0x599826a1UL, 0x445c64ecUL, 0xa977c852UL, 0xa633ffcdUL, 0x41172ba0UL, 0xa2d9ba7cUL, 0x6f038021UL,
-0x089cd950UL, 0x61483fcbUL, 0x65d76bc2UL, 0xabf6a364UL, 0x76263480UL, 0x7b5ea725UL, 0xfcd1e6e4UL, 0xe610c720UL,
-0x80b6f0cdUL, 0x3b4d8417UL, 0x4df8ee31UL, 0xe424087eUL, 0xeb49cb2cUL, 0xae3b6a84UL, 0x8878f78fUL, 0xf6605deeUL,
-0x7356f77aUL, 0xdb5cdd2fUL, 0xc13116a1UL, 0x436ff630UL, 0x54ecfab3UL, 0xfad77f15UL, 0xcc7985efUL, 0x58de52d1UL,
-0x5efd2fdbUL, 0x19ce328fUL, 0x7af96a30UL, 0xf83ef002UL, 0xd59a3199UL, 0x0ffa42c2UL, 0xb0ebe3a7UL, 0x06498ec6UL,
-0x0c23dab8UL, 0x28308280UL, 0xc8f3dedcUL, 0x71b15fd3UL, 0xc81b8a08UL, 0x60c5c0beUL, 0xe8c9a361UL, 0x4df5a8bcUL,
-0xfaef2fc7UL, 0x992e8222UL, 0xb470c582UL, 0x894ed9d8UL, 0xbc341c8bUL, 0xe6161e30UL, 0x79e93b27UL, 0xa6eaffb0UL,
-0xc6b8d961UL, 0x6948b200UL, 0x3fceffb7UL, 0x3b28dc08UL, 0x5af6da43UL, 0x9897e1f7UL, 0x2fb71976UL, 0xa49b1c8fUL,
-0xa03786dcUL, 0xb1d3a716UL, 0xb793c39fUL, 0xeb6e13a7UL, 0x3ec6bcc6UL, 0x4237511aUL, 0xbc2868efUL, 0xd6650352UL,
-0xab776a2dUL, 0x4bed2735UL, 0x16d21f82UL, 0x2e6e5c09UL, 0xfbf292dbUL, 0xcb29ea5eUL, 0xf5925814UL, 0x7f4f5891UL,
-0x7b698354UL, 0xcca86726UL, 0x48601985UL, 0xeaac4b8cUL, 0xd4603883UL, 0xf9e0230dUL, 0x8a7e386cUL, 0x49d2e60aUL,
-0x0c6084b2UL, 0x1d7335d8UL, 0x47c6b1dcUL, 0xea564cacUL, 0xb381bd3eUL, 0xb0ab0e23UL, 0x87bc3864UL, 0xfab1b5f0UL,
-0xb3a25e8fUL, 0x424618fcUL, 0x7a6b030aUL, 0xbd89b04fUL, 0x89a59d64UL, 0x5e4145a3UL, 0x2383035cUL, 0xb93b5d3eUL,
-0x7295d743UL, 0x7cd06d7eUL, 0x1edfdf06UL, 0xefc46c6cUL, 0x39a56071UL, 0x70bebf73UL, 0x05768783UL, 0xf1ec2345UL};
-
-uint32_t s3[] PROGMEM = {
-0x40c2ef8dUL, 0x9f5dfa25UL, 0xbf3d90ebUL, 0x07c910e8UL, 0xff7f6047UL, 0x4be49f36UL, 0x44c61f8cUL, 0x90caceaeUL,
-0xbff9b1beUL, 0xeacafbeeUL, 0x5019cfe8UL, 0xae07df51UL, 0x06880e92UL, 0x4805adf0UL, 0x838d3ce1UL, 0xd5107092UL,
-0x9f7d1011UL, 0xb97d6407UL, 0xd4e4e3b2UL, 0x5e284f3dUL, 0x20a8afb9UL, 0xe082defaUL, 0x8b2667a0UL, 0x2e797282UL,
-0xc0b23f55UL, 0x2be29a48UL, 0x9497efd4UL, 0xbc3f5e12UL, 0xeefcff21UL, 0xfd1b5b82UL, 0xedc55592UL, 0x40a25712UL,
-0x02831a4eUL, 0xff7fe0baUL, 0xe7468252UL, 0x0e14578eUL, 0xbff77333UL, 0x88819f8cUL, 0xe84efca6UL, 0xa5b582c9UL,
-0xb71dc0a8UL, 0x64c29f57UL, 0x314f0967UL, 0x5f3fbdf2UL, 0xc1f7ff40UL, 0xfc8db71fUL, 0xc1d26b8eUL, 0x9be57b43UL,
-0xbf3db099UL, 0x4bc6dbb5UL, 0xe6c08d63UL, 0x999d8155UL, 0x1cc897a1UL, 0x6e2d014aUL, 0x284a88c5UL, 0x716fc3ccUL,
-0x13c243b8UL, 0xf143076cUL, 0x3c890983UL, 0x5fdded0fUL, 0x50e87f2fUL, 0x7e7fc0d7UL, 0xbf7f5002UL, 0x049afb5aUL,
-0xd0d247a7UL, 0x2e195116UL, 0x3ebf70afUL, 0x8013c358UL, 0x2e30985fUL, 0xc4c37c72UL, 0x02b40f0aUL, 0x82ef7f0fUL,
-0xadfd968cUL, 0xae2a2c5dUL, 0x499ae98eUL, 0xb888da50UL, 0xa0f42784UL, 0x9057ac1eUL, 0x49b46f79UL, 0x15dc5282UL,
-0x9b7dbdefUL, 0x7d5972a6UL, 0xd840a8adUL, 0x0445f545UL, 0x03745dfaUL, 0x05c33ee8UL, 0x1a75914fUL, 0xc2695692UL,
-0x41e9ef23UL, 0x2ef103a9UL, 0xf20d2760UL, 0xb6e47602UL, 0x7465fd94UL, 0xb2857992UL, 0xcbdb7682UL, 0x76817702UL,
-0x8d91aff8UL, 0x9ef7484eUL, 0xdf6d618fUL, 0x0e849de2UL, 0x837d2f84UL, 0xc8e50c34UL, 0x82b6bb96UL, 0x48b1b493UL,
-0xab3c30efUL, 0x28af4f98UL, 0x9baf9f77UL, 0x0d56dc92UL, 0x201e4d22UL, 0x88aa3784UL, 0x96dc297dUL, 0xdcd35627UL,
-0xee7c908bUL, 0x40d21fb5UL, 0xe37cc0e7UL, 0xa1b466e5UL, 0x5e61e9c3UL, 0x9d20f83cUL, 0xe3d19460UL, 0x41a39ccdUL,
-0x0e46765cUL, 0x3b98ea00UL, 0x8178d6d4UL, 0x2c5747fdUL, 0xd9ed6cf7UL, 0x9c22a8bdUL, 0xaaad7d12UL, 0x4e078a43UL,
-0x90c0971fUL, 0x8adb1b08UL, 0xbe7ea093UL, 0x15ca38b9UL, 0xff3cb097UL, 0xf8c0c23dUL, 0xecb21a8dUL, 0x510e3864UL,
-0xfb7bcc68UL, 0x88270fd9UL, 0x81014912UL, 0xd4ffe55dUL, 0x6af87eddUL, 0x14e2a276UL, 0x6803a4b9UL, 0x8f955d92UL,
-0xfaff394bUL, 0xe9ae39baUL, 0x0bd3ffa4UL, 0x3b93f7faUL, 0x2386496dUL, 0xfabc3c19UL, 0x45756227UL, 0x7af45c82UL,
-0xa08bbd61UL, 0xd1421ed1UL, 0xf404adceUL, 0x92a37e12UL, 0xb78d4210UL, 0x72a97282UL, 0xa8c47092UL, 0x0be57d12UL,
-0xc8a15b28UL, 0x4ff4623cUL, 0xa5eac035UL, 0x31d205e8UL, 0xfb298942UL, 0x82dffcb4UL, 0x536ab64fUL, 0x5bc17d0eUL,
-0xab1f081fUL, 0xae188610UL, 0x6d08fdfcUL, 0x8928fff9UL, 0x11cc4b69UL, 0xae5c6a23UL, 0x4dcade12UL, 0xc58c3f2cUL,
-0xfe2dd0d2UL, 0x9658eff8UL, 0xda52cfe4UL, 0x675b1595UL, 0x8c484a49UL, 0x0ca8b6b9UL, 0xbc828f5cUL, 0x456bd389UL,
-0x3794603aUL, 0xa9c900ecUL, 0x53527144UL, 0x494b870aUL, 0x40bc73d7UL, 0x1c67347cUL, 0xf67e7102UL, 0x3655eb4fUL,
-0xff2fd0a2UL, 0xc460bfd2UL, 0xc0033fd4UL, 0x6defb450UL, 0xd18c4707UL, 0x88186e00UL, 0x553fe5a2UL, 0xbcd4e6b9UL,
-0x168004a2UL, 0x33385797UL, 0x677d20d7UL, 0x3d8f0fdeUL, 0x337bf872UL, 0x334fccabUL, 0x5dc58876UL, 0xb0a6007bUL,
-0x01007b94UL, 0xd2750057UL, 0xf888bbf9UL, 0x9e014289UL, 0xffa56442UL, 0xe0026385UL, 0x2bd9db72UL, 0x691b97eeUL,
-0xde2fa26eUL, 0x2bae085fUL, 0x6d617aafUL, 0x6787c9e5UL, 0xd2eb1fcfUL, 0xc2c8ef61UL, 0x7125acf1UL, 0xc23982ccUL,
-0xb84c2167UL, 0xd183e5b1UL, 0x623edcb7UL, 0xcebd107fUL, 0x385c0af9UL, 0x3d44f00fUL, 0xc66d6e60UL, 0x493a5460UL,
-0x48c12757UL, 0x1d8ae92bUL, 0x3817b48aUL, 0x24bee120UL, 0x0fda96afUL, 0x25844568UL, 0xe53b8399UL, 0x7d450d60UL,
-0x50932f28UL, 0x62b33483UL, 0x20111dd9UL, 0xa08d6d2bUL, 0x311e2b64UL, 0x005a309cUL, 0x88e6bc52UL, 0x8a58031bUL,
-0xd5efbaf7UL, 0x9ced4241UL, 0x115c31a4UL, 0xc53e3283UL, 0x3646efdfUL, 0x01c533a1UL, 0x1c53d3e9UL, 0x833735eeUL};
-
-uint32_t s4[] PROGMEM = {
-0x2004b39dUL, 0xdee9b61fUL, 0xef7bbea7UL, 0x98a273d2UL, 0xdb7b4f4aUL, 0x578cad64UL, 0x43045185UL, 0xd10e02faUL,
-0xff7a287eUL, 0x63b60fe6UL, 0xa1355f09UL, 0x20f1eb79UL, 0x439d05fdUL, 0xb1b79764UL, 0x631f64f3UL, 0xdf4a1e24UL,
-0x5f7f1428UL, 0xcdb8a24fUL, 0x400043c9UL, 0x2022c30cUL, 0x300bd3fdUL, 0x4f37a5c0UL, 0xd9002d1dUL, 0x157b1424UL,
-0x1a114deeUL, 0x6751ca0fUL, 0x4c90ff71UL, 0xfe5f192dUL, 0x5f64051aUL, 0xfefe130cUL, 0xca081b08UL, 0x21011705UL,
-0x00015380UL, 0xfe5e3ee8UL, 0xf8f49aacUL, 0x0127e77fUL, 0x5feeb8d2UL, 0x6142df06UL, 0x8a9b9ebbUL, 0x25ea9372UL,
-0xdfff84ceUL, 0x018871f5UL, 0x044bd63dUL, 0x3b266fa2UL, 0x0084d47eUL, 0xe6eb7e54UL, 0xa04c6d44UL, 0xf5d6f36cUL,
-0xdfab4926UL, 0xf5c7a0aeUL, 0xc18c3336UL, 0x937e3f50UL, 0x612077d3UL, 0xe138b611UL, 0x030e5072UL, 0xbbb20ef8UL,
-0x2e50e0abUL, 0xde778decUL, 0x811e9757UL, 0x46674fe1UL, 0x005433c9UL, 0x8f312069UL, 0x99bb1d08UL, 0xa504c3ffUL,
-0x0518354dUL, 0xe35c3d7fUL, 0xc666c8a6UL, 0xa9cc5b5dUL, 0xea6fecdaUL, 0x916f929fUL, 0x2f22469fUL, 0x7d469139UL,
-0x8e6dbfa5UL, 0x4fc44311UL, 0x02839543UL, 0xeb4e21d0UL, 0xb8832002UL, 0x0c18b63fUL, 0x1e93f818UL, 0xe6581628UL,
-0x3e6e4826UL, 0x708ad78bUL, 0xc1e47774UL, 0x7ce006b5UL, 0x250a2df3UL, 0x028b0979UL, 0x81bbeae4UL, 0x233b1228UL,
-0x38adde69UL, 0x16ca7415UL, 0x621b87dfUL, 0xb7401c21UL, 0xf99e1aa5UL, 0x7b371400UL, 0xc88a1e04UL, 0x03401109UL,
-0xd2e459bdUL, 0xd556d1e3UL, 0xd576e84fUL, 0x40a3912fUL, 0xdee87b55UL, 0xa7e4ea00UL, 0xecc2e50cUL, 0xa6bbb44dUL,
-0xffbd56e7UL, 0xac6933ddUL, 0x35b017ecUL, 0x27235706UL, 0xb0c8af99UL, 0x91c3c856UL, 0x1c81656bUL, 0x1961145eUL,
-0x75cb856eUL, 0x02c007beUL, 0x775532c2UL, 0xecf43f89UL, 0x2dc9bf5bUL, 0x253becd0UL, 0xb71a80b7UL, 0x243b6d8dUL,
-0xef63c720UL, 0xfca566c3UL, 0x8028389cUL, 0x0532ce0aUL, 0x8a54c9aaUL, 0xc7d7a1ecUL, 0x32fa1a04UL, 0x5a62161dUL,
-0x2c900167UL, 0x547a759bUL, 0xf777d431UL, 0x31b02691UL, 0xdb6fcc36UL, 0x468b0bc7UL, 0x486ae6d9UL, 0x795ae556UL,
-0xeb4c6a02UL, 0xff7e4352UL, 0xb4768f2fUL, 0xa580f90dUL, 0xe3cd7486UL, 0xeb04daedUL, 0x04bea917UL, 0xdff4182cUL,
-0x9d7f74b7UL, 0xb4f72aabUL, 0x204dc3efUL, 0x7c6b092eUL, 0x54a24117UL, 0x35a0b6e5UL, 0xf6423d21UL, 0x267c1c2cUL,
-0x0ff5c261UL, 0xf9da5265UL, 0xf831c2d2UL, 0x690f1325UL, 0xa27f16d8UL, 0xc8f21804UL, 0xa6961a00UL, 0xab26150dUL,
-0x215c3163UL, 0xec720a5eUL, 0xfdfeba49UL, 0xd9087918UL, 0x86bd0d8dUL, 0xa7701131UL, 0x0c649b3eUL, 0xd7103eccUL,
-0xb6d3cad5UL, 0x88c3ae0cUL, 0xe10130f7UL, 0xff8a726cUL, 0xa1e2ea71UL, 0x6ef39a1fUL, 0x2fd1cbcfUL, 0x1784dec1UL,
-0x6bbe07acUL, 0xd8a144cbUL, 0x560f9b8bUL, 0xc3883901UL, 0xca2fc5b1UL, 0xcd31beb4UL, 0x062878d8UL, 0xe2a4a312UL,
-0x32e57d6fUL, 0xb67efd58UL, 0x00e91ed0UL, 0xc2ffad24UL, 0xc50f99f4UL, 0xc5aa1197UL, 0x957b1d00UL, 0xd2e7e582UL,
-0xf6739810UL, 0x96306100UL, 0x21952dc3UL, 0xff21a1adUL, 0x15849029UL, 0x7f97bb7fUL, 0xdbb39eafUL, 0x2aedc929UL,
-0x65a4e25cUL, 0x2cf330a7UL, 0xe83faad0UL, 0x91c05c8aUL, 0xe72c9ed4UL, 0xa954e40cUL, 0x86cd0ad6UL, 0x19195f01UL,
-0x03910777UL, 0xf63aa0deUL, 0x5e56a878UL, 0xdf56e3deUL, 0xbe5cf021UL, 0x87e3758bUL, 0x5106c5b3UL, 0xefc3a5b8UL,
-0xd2b6eed8UL, 0x77be23e5UL, 0x294515c2UL, 0xdfef692fUL, 0xfb7ae6afUL, 0xb2c470f4UL, 0x5bebe0f3UL, 0x7698ccd6UL,
-0x0c46e439UL, 0x3885da1fUL, 0x2f838719UL, 0x677300caUL, 0xf84491a9UL, 0x9e296b29UL, 0x95c22f49UL, 0xabbe6692UL,
-0x696e67b5UL, 0xdaddd39bUL, 0x2f057edfUL, 0x1c7025dbUL, 0xee515e1bUL, 0xe62453f6UL, 0x6ce3fc6aUL, 0x04cc1603UL,
-0x3e214486UL, 0xd059dcb7UL, 0x1f296579UL, 0x43fdd6ccUL, 0x79398241UL, 0xf6cd2b93UL, 0x4dc357b6UL, 0x82d2df4eUL,
-0x0c29e57aUL, 0x6b53b93cUL, 0xfe201e85UL, 0x7e553398UL, 0xb0f0ec13UL, 0x72b3ffd3UL, 0xc1c5853fUL, 0xd27eef0aUL};
-
-#endif
-
-/*********************************************************************************************************/
-
-#ifdef BIG_ENDIAN
-
-uint32_t s5[] PROGMEM = {
-0x7ec90c04UL, 0x2c6e74b9UL, 0x9b0e66dfUL, 0xa6337911UL, 0xb86a7fffUL, 0x1dd358f5UL, 0x44dd9d44UL, 0x1731167fUL,
-0x08fbf1faUL, 0xe7f511ccUL, 0xd2051b00UL, 0x735aba00UL, 0x2ab722d8UL, 0x386381cbUL, 0xacf6243aUL, 0x69befd7aUL,
-0xe6a2e77fUL, 0xf0c720cdUL, 0xc4494816UL, 0xccf5c180UL, 0x38851640UL, 0x15b0a848UL, 0xe68b18cbUL, 0x4caadeffUL,
-0x5f480a01UL, 0x0412b2aaUL, 0x259814fcUL, 0x41d0efe2UL, 0x4e40b48dUL, 0x248eb6fbUL, 0x8dba1cfeUL, 0x41a99b02UL,
-0x1a550a04UL, 0xba8f65cbUL, 0x7251f4e7UL, 0x95a51725UL, 0xc106ecd7UL, 0x97a5980aUL, 0xc539b9aaUL, 0x4d79fe6aUL,
-0xf2f3f763UL, 0x68af8040UL, 0xed0c9e56UL, 0x11b4958bUL, 0xe1eb5a88UL, 0x8709e6b0UL, 0xd7e07156UL, 0x4e29fea7UL,
-0x6366e52dUL, 0x02d1c000UL, 0xc4ac8e05UL, 0x9377f571UL, 0x0c05372aUL, 0x578535f2UL, 0x2261be02UL, 0xd642a0c9UL,
-0xdf13a280UL, 0x74b55bd2UL, 0x682199c0UL, 0xd421e5ecUL, 0x53fb3ce8UL, 0xc8adedb3UL, 0x28a87fc9UL, 0x3d959981UL,
-0x5c1ff900UL, 0xfe38d399UL, 0x0c4eff0bUL, 0x062407eaUL, 0xaa2f4fb1UL, 0x4fb96976UL, 0x90c79505UL, 0xb0a8a774UL,
-0xef55a1ffUL, 0xe59ca2c2UL, 0xa6b62d27UL, 0xe66a4263UL, 0xdf65001fUL, 0x0ec50966UL, 0xdfdd55bcUL, 0x29de0655UL,
-0x911e739aUL, 0x17af8975UL, 0x32c7911cUL, 0x89f89468UL, 0x0d01e980UL, 0x524755f4UL, 0x03b63cc9UL, 0x0cc844b2UL,
-0xbcf3f0aaUL, 0x87ac36e9UL, 0xe53a7426UL, 0x01b3d82bUL, 0x1a9e7449UL, 0x64ee2d7eUL, 0xcddbb1daUL, 0x01c94910UL,
-0xb868bf80UL, 0x0d26f3fdUL, 0x9342ede7UL, 0x04a5c284UL, 0x636737b6UL, 0x50f5b616UL, 0xf24766e3UL, 0x8eca36c1UL,
-0x136e05dbUL, 0xfef18391UL, 0xfb887a37UL, 0xd6e7f7d4UL, 0xc7fb7dc9UL, 0x3063fcdfUL, 0xb6f589deUL, 0xec2941daUL,
-0x26e46695UL, 0xb7566419UL, 0xf654efc5UL, 0xd08d58b7UL, 0x48925401UL, 0xc1bacb7fUL, 0xe5ff550fUL, 0xb6083049UL,
-0x5bb5d0e8UL, 0x87d72e5aUL, 0xab6a6ee1UL, 0x223a66ceUL, 0xc62bf3cdUL, 0x9e0885f9UL, 0x68cb3e47UL, 0x086c010fUL,
-0xa21de820UL, 0xd18b69deUL, 0xf3f65777UL, 0xfa02c3f6UL, 0x407edac3UL, 0xcbb3d550UL, 0x1793084dUL, 0xb0d70ebaUL,
-0x0ab378d5UL, 0xd951fb0cUL, 0xded7da56UL, 0x4124bbe4UL, 0x94ca0b56UL, 0x0f5755d1UL, 0xe0e1e56eUL, 0x6184b5beUL,
-0x580a249fUL, 0x94f74bc0UL, 0xe327888eUL, 0x9f7b5561UL, 0xc3dc0280UL, 0x05687715UL, 0x646c6bd7UL, 0x44904db3UL,
-0x66b4f0a3UL, 0xc0f1648aUL, 0x697ed5afUL, 0x49e92ff6UL, 0x309e374fUL, 0x2cb6356aUL, 0x85808573UL, 0x4991f840UL,
-0x76f0ae02UL, 0x083be84dUL, 0x28421c9aUL, 0x44489406UL, 0x736e4cb8UL, 0xc1092910UL, 0x8bc95fc6UL, 0x7d869cf4UL,
-0x134f616fUL, 0x2e77118dUL, 0xb31b2be1UL, 0xaa90b472UL, 0x3ca5d717UL, 0x7d161bbaUL, 0x9cad9010UL, 0xaf462ba2UL,
-0x9fe459d2UL, 0x45d34559UL, 0xd9f2da13UL, 0xdbc65487UL, 0xf3e4f94eUL, 0x176d486fUL, 0x097c13eaUL, 0x631da5c7UL,
-0x445f7382UL, 0x175683f4UL, 0xcdc66a97UL, 0x70be0288UL, 0xb3cdcf72UL, 0x6e5dd2f3UL, 0x20936079UL, 0x459b80a5UL,
-0xbe60e2dbUL, 0xa9c23101UL, 0xeba5315cUL, 0x224e42f2UL, 0x1c5c1572UL, 0xf6721b2cUL, 0x1ad2fff3UL, 0x8c25404eUL,
-0x324ed72fUL, 0x4067b7fdUL, 0x0523138eUL, 0x5ca3bc78UL, 0xdc0fd66eUL, 0x75922283UL, 0x784d6b17UL, 0x58ebb16eUL,
-0x44094f85UL, 0x3f481d87UL, 0xfcfeae7bUL, 0x77b5ff76UL, 0x8c2302bfUL, 0xaaf47556UL, 0x5f46b02aUL, 0x2b092801UL,
-0x3d38f5f7UL, 0x0ca81f36UL, 0x52af4a8aUL, 0x66d5e7c0UL, 0xdf3b0874UL, 0x95055110UL, 0x1b5ad7a8UL, 0xf61ed5adUL,
-0x6cf6e479UL, 0x20758184UL, 0xd0cefa65UL, 0x88f7be58UL, 0x4a046826UL, 0x0ff6f8f3UL, 0xa09c7f70UL, 0x5346aba0UL,
-0x5ce96c28UL, 0xe176eda3UL, 0x6bac307fUL, 0x376829d2UL, 0x85360fa9UL, 0x17e3fe2aUL, 0x24b79767UL, 0xf5a96b20UL,
-0xd6cd2595UL, 0x68ff1ebfUL, 0x7555442cUL, 0xf19f06beUL, 0xf9e0659aUL, 0xeeb9491dUL, 0x34010718UL, 0xbb30cab8UL,
-0xe822fe15UL, 0x88570983UL, 0x750e6249UL, 0xda627e55UL, 0x5e76ffa8UL, 0xb1534546UL, 0x6d47de08UL, 0xefe9e7d4UL};
-
-
-uint32_t s6[] PROGMEM = {
-0xf6fa8f9dUL, 0x2cac6ce1UL, 0x4ca34867UL, 0xe2337f7cUL, 0x95db08e7UL, 0x016843b4UL, 0xeced5cbcUL, 0x325553acUL,
-0xbf9f0960UL, 0xdfa1e2edUL, 0x83f0579dUL, 0x63ed86b9UL, 0x1ab6a6b8UL, 0xde5ebe39UL, 0xf38ff732UL, 0x8989b138UL,
-0x33f14961UL, 0xc01937bdUL, 0xf506c6daUL, 0xe4625e7eUL, 0xa308ea99UL, 0x4e23e33cUL, 0x79cbd7ccUL, 0x48a14367UL,
-0xa3149619UL, 0xfec94bd5UL, 0xa114174aUL, 0xeaa01866UL, 0xa084db2dUL, 0x09a8486fUL, 0xa888614aUL, 0x2900af98UL,
-0x01665991UL, 0xe1992863UL, 0xc8f30c60UL, 0x2e78ef3cUL, 0xd0d51932UL, 0xcf0fec14UL, 0xf7ca07d2UL, 0xd0a82072UL,
-0xfd41197eUL, 0x9305a6b0UL, 0xe86be3daUL, 0x74bed3cdUL, 0x372da53cUL, 0x4c7f4448UL, 0xdab5d440UL, 0x6dba0ec3UL,
-0x083919a7UL, 0x9fbaeed9UL, 0x49dbcfb0UL, 0x4e670c53UL, 0x5c3d9c01UL, 0x64bdb941UL, 0x2c0e636aUL, 0xba7dd9cdUL,
-0xea6f7388UL, 0xe70bc762UL, 0x35f29adbUL, 0x5c4cdd8dUL, 0xf0d48d8cUL, 0xb88153e2UL, 0x08a19866UL, 0x1ae2eac8UL,
-0x284caf89UL, 0xaa928223UL, 0x9334be53UL, 0x3b3a21bfUL, 0x16434be3UL, 0x9aea3906UL, 0xefe8c36eUL, 0xf890cdd9UL,
-0x80226daeUL, 0xc340a4a3UL, 0xdf7e9c09UL, 0xa694a807UL, 0x5b7c5eccUL, 0x221db3a6UL, 0x9a69a02fUL, 0x68818a54UL,
-0xceb2296fUL, 0x53c0843aUL, 0xfe893655UL, 0x25bfe68aUL, 0xb4628abcUL, 0xcf222ebfUL, 0x25ac6f48UL, 0xa9a99387UL,
-0x53bddb65UL, 0xe76ffbe7UL, 0xe967fd78UL, 0x0ba93563UL, 0x8e342bc1UL, 0xe8a11be9UL, 0x4980740dUL, 0xc8087dfcUL,
-0x8de4bf99UL, 0xa11101a0UL, 0x7fd37975UL, 0xda5a26c0UL, 0xe81f994fUL, 0x9528cd89UL, 0xfd339fedUL, 0xb87834bfUL,
-0x5f04456dUL, 0x22258698UL, 0xc9c4c83bUL, 0x2dc156beUL, 0x4f628daaUL, 0x57f55ec5UL, 0xe2220abeUL, 0xd2916ebfUL,
-0x4ec75b95UL, 0x24f2c3c0UL, 0x42d15d99UL, 0xcd0d7fa0UL, 0x7b6e27ffUL, 0xa8dc8af0UL, 0x7345c106UL, 0xf41e232fUL,
-0x35162386UL, 0xe6ea8926UL, 0x3333b094UL, 0x157ec6f2UL, 0x372b74afUL, 0x692573e4UL, 0xe9a9d848UL, 0xf3160289UL,
-0x3a62ef1dUL, 0xa787e238UL, 0xf3a5f676UL, 0x74364853UL, 0x20951063UL, 0x4576698dUL, 0xb6fad407UL, 0x592af950UL,
-0x36f73523UL, 0x4cfb6e87UL, 0x7da4cec0UL, 0x6c152daaUL, 0xcb0396a8UL, 0xc50dfe5dUL, 0xfcd707abUL, 0x0921c42fUL,
-0x89dff0bbUL, 0x5fe2be78UL, 0x448f4f33UL, 0x754613c9UL, 0x2b05d08dUL, 0x48b9d585UL, 0xdc049441UL, 0xc8098f9bUL,
-0x7dede786UL, 0xc39a3373UL, 0x42410005UL, 0x6a091751UL, 0x0ef3c8a6UL, 0x890072d6UL, 0x28207682UL, 0xa9a9f7beUL,
-0xbf32679dUL, 0xd45b5b75UL, 0xb353fd00UL, 0xcbb0e358UL, 0x830f220aUL, 0x1f8fb214UL, 0xd372cf08UL, 0xcc3c4a13UL,
-0x8cf63166UL, 0x061c87beUL, 0x88c98f88UL, 0x6062e397UL, 0x47cf8e7aUL, 0xb6c85283UL, 0x3cc2acfbUL, 0x3fc06976UL,
-0x4e8f0252UL, 0x64d8314dUL, 0xda3870e3UL, 0x1e665459UL, 0xc10908f0UL, 0x513021a5UL, 0x6c5b68b7UL, 0x822f8aa0UL,
-0x3007cd3eUL, 0x74719eefUL, 0xdc872681UL, 0x073340d4UL, 0x7e432fd9UL, 0x0c5ec241UL, 0x8809286cUL, 0xf592d891UL,
-0x08a930f6UL, 0x957ef305UL, 0xb7fbffbdUL, 0xc266e96fUL, 0x6fe4ac98UL, 0xb173ecc0UL, 0xbc60b42aUL, 0x953498daUL,
-0xfba1ae12UL, 0x2d4bd736UL, 0x0f25faabUL, 0xa4f3fcebUL, 0xe2969123UL, 0x257f0c3dUL, 0x9348af49UL, 0x361400bcUL,
-0xe8816f4aUL, 0x3814f200UL, 0xa3f94043UL, 0x9c7a54c2UL, 0xbc704f57UL, 0xda41e7f9UL, 0xc25ad33aUL, 0x54f4a084UL,
-0xb17f5505UL, 0x59357cbeUL, 0xedbd15c8UL, 0x7f97c5abUL, 0xba5ac7b5UL, 0xb6f6deafUL, 0x3a479c3aUL, 0x5302da25UL,
-0x653d7e6aUL, 0x54268d49UL, 0x51a477eaUL, 0x5017d55bUL, 0xd7d25d88UL, 0x44136c76UL, 0x0404a8c8UL, 0xb8e5a121UL,
-0xb81a928aUL, 0x60ed5869UL, 0x97c55b96UL, 0xeaec991bUL, 0x29935913UL, 0x01fdb7f1UL, 0x088e8dfaUL, 0x9ab6f6f5UL,
-0x3b4cbf9fUL, 0x4a5de3abUL, 0xe6051d35UL, 0xa0e1d855UL, 0xd36b4cf1UL, 0xf544edebUL, 0xb0e93524UL, 0xbebb8fbdUL,
-0xa2d762cfUL, 0x49c92f54UL, 0x38b5f331UL, 0x7128a454UL, 0x48392905UL, 0xa65b1db8UL, 0x851c97bdUL, 0xd675cf2fUL};
-
-
-uint32_t s7[] PROGMEM = {
-0x85e04019UL, 0x332bf567UL, 0x662dbfffUL, 0xcfc65693UL, 0x2a8d7f6fUL, 0xab9bc912UL, 0xde6008a1UL, 0x2028da1fUL,
-0x0227bce7UL, 0x4d642916UL, 0x18fac300UL, 0x50f18b82UL, 0x2cb2cb11UL, 0xb232e75cUL, 0x4b3695f2UL, 0xb28707deUL,
-0xa05fbcf6UL, 0xcd4181e9UL, 0xe150210cUL, 0xe24ef1bdUL, 0xb168c381UL, 0xfde4e789UL, 0x5c79b0d8UL, 0x1e8bfd43UL,
-0x4d495001UL, 0x38be4341UL, 0x913cee1dUL, 0x92a79c3fUL, 0x089766beUL, 0xbaeeadf4UL, 0x1286becfUL, 0xb6eacb19UL,
-0x2660c200UL, 0x7565bde4UL, 0x64241f7aUL, 0x8248dca9UL, 0xc3b3ad66UL, 0x28136086UL, 0x0bd8dfa8UL, 0x356d1cf2UL,
-0x107789beUL, 0xb3b2e9ceUL, 0x0502aa8fUL, 0x0bc0351eUL, 0x166bf52aUL, 0xeb12ff82UL, 0xe3486911UL, 0xd34d7516UL,
-0x4e7b3affUL, 0x5f43671bUL, 0x9cf6e037UL, 0x4981ac83UL, 0x334266ceUL, 0x8c9341b7UL, 0xd0d854c0UL, 0xcb3a6c88UL,
-0x47bc2829UL, 0x4725ba37UL, 0xa66ad22bUL, 0x7ad61f1eUL, 0x0c5cbafaUL, 0x4437f107UL, 0xb6e79962UL, 0x42d2d816UL,
-0x0a961288UL, 0xe1a5c06eUL, 0x13749e67UL, 0x72fc081aUL, 0xb1d139f7UL, 0xf9583745UL, 0xcf19df58UL, 0xbec3f756UL,
-0xc06eba30UL, 0x07211b24UL, 0x45c28829UL, 0xc95e317fUL, 0xbc8ec511UL, 0x38bc46e9UL, 0xc6e6fa14UL, 0xbae8584aUL,
-0xad4ebc46UL, 0x468f508bUL, 0x7829435fUL, 0xf124183bUL, 0x821dba9fUL, 0xaff60ff4UL, 0xea2c4e6dUL, 0x16e39264UL,
-0x92544a8bUL, 0x009b4fc3UL, 0xaba68cedUL, 0x9ac96f78UL, 0x06a5b79aUL, 0xb2856e6eUL, 0x1aec3ca9UL, 0xbe838688UL,
-0x0e0804e9UL, 0x55f1be56UL, 0xe7e5363bUL, 0xb3a1f25dUL, 0xf7debb85UL, 0x61fe033cUL, 0x16746233UL, 0x3c034c28UL,
-0xda6d0c74UL, 0x79aac56cUL, 0x3ce4e1adUL, 0x51f0c802UL, 0x98f8f35aUL, 0x1626a49fUL, 0xeed82b29UL, 0x1d382fe3UL,
-0x0c4fb99aUL, 0xbb325778UL, 0x3ec6d97bUL, 0x6e77a6a9UL, 0xcb658b5cUL, 0xd45230c7UL, 0x2bd1408bUL, 0x60c03eb7UL,
-0xb9068d78UL, 0xa33754f4UL, 0xf430c87dUL, 0xc8a71302UL, 0xb96d8c32UL, 0xebd4e7beUL, 0xbe8b9d2dUL, 0x7979fb06UL,
-0xe7225308UL, 0x8b75cf77UL, 0x11ef8da4UL, 0xe083c858UL, 0x8d6b786fUL, 0x5a6317a6UL, 0xfa5cf7a0UL, 0x5dda0033UL,
-0xf28ebfb0UL, 0xf5b9c310UL, 0xa0eac280UL, 0x08b9767aUL, 0xa3d9d2b0UL, 0x79d34217UL, 0x021a718dUL, 0x9ac6336aUL,
-0x2711fd60UL, 0x438050e3UL, 0x069908a8UL, 0x3d7fedc4UL, 0x826d2befUL, 0x4eeb8476UL, 0x488dcf25UL, 0x36c9d566UL,
-0x28e74e41UL, 0xc2610acaUL, 0x3d49a9cfUL, 0xbae3b9dfUL, 0xb65f8de6UL, 0x92aeaf64UL, 0x3ac7d5e6UL, 0x9ea80509UL,
-0xf22b017dUL, 0xa4173f70UL, 0xdd1e16c3UL, 0x15e0d7f9UL, 0x50b1b887UL, 0x2b9f4fd5UL, 0x625aba82UL, 0x6a017962UL,
-0x2ec01b9cUL, 0x15488aa9UL, 0xd716e740UL, 0x40055a2cUL, 0x93d29a22UL, 0xe32dbf9aUL, 0x058745b9UL, 0x3453dc1eUL,
-0xd699296eUL, 0x496cff6fUL, 0x1c9f4986UL, 0xdfe2ed07UL, 0xb87242d1UL, 0x19de7eaeUL, 0x053e561aUL, 0x15ad6f8cUL,
-0x66626c1cUL, 0x7154c24cUL, 0xea082b2aUL, 0x93eb2939UL, 0x17dcb0f0UL, 0x58d4f2aeUL, 0x9ea294fbUL, 0x52cf564cUL,
-0x9883fe66UL, 0x2ec40581UL, 0x763953c3UL, 0x01d6692eUL, 0xd3a0c108UL, 0xa1e7160eUL, 0xe4f2dfa6UL, 0x693ed285UL,
-0x74904698UL, 0x4c2b0eddUL, 0x4f757656UL, 0x5d393378UL, 0xa132234fUL, 0x3d321c5dUL, 0xc3f5e194UL, 0x4b269301UL,
-0xc79f022fUL, 0x3c997e7eUL, 0x5e4f9504UL, 0x3ffafbbdUL, 0x76f7ad0eUL, 0x296693f4UL, 0x3d1fce6fUL, 0xc61e45beUL,
-0xd3b5ab34UL, 0xf72bf9b7UL, 0x1b0434c0UL, 0x4e72b567UL, 0x5592a33dUL, 0xb5229301UL, 0xcfd2a87fUL, 0x60aeb767UL,
-0x1814386bUL, 0x30bcc33dUL, 0x38a0c07dUL, 0xfd1606f2UL, 0xc363519bUL, 0x589dd390UL, 0x5479f8e6UL, 0x1cb8d647UL,
-0x97fd61a9UL, 0xea7759f4UL, 0x2d57539dUL, 0x569a58cfUL, 0xe84e63adUL, 0x462e1b78UL, 0x6580f87eUL, 0xf3817914UL,
-0x91da55f4UL, 0x40a230f3UL, 0xd1988f35UL, 0xb6e318d2UL, 0x3ffa50bcUL, 0x3d40f021UL, 0xc3c0bdaeUL, 0x4958c24cUL,
-0x518f36b2UL, 0x84b1d370UL, 0x0fedce83UL, 0x878ddadaUL, 0xf2a279c7UL, 0x94e01be8UL, 0x90716f4bUL, 0x954b8aa3UL};
-
-
-uint32_t s8[] PROGMEM = {
-0xe216300dUL, 0xbbddfffcUL, 0xa7ebdabdUL, 0x35648095UL, 0x7789f8b7UL, 0xe6c1121bUL, 0x0e241600UL, 0x052ce8b5UL,
-0x11a9cfb0UL, 0xe5952f11UL, 0xece7990aUL, 0x9386d174UL, 0x2a42931cUL, 0x76e38111UL, 0xb12def3aUL, 0x37ddddfcUL,
-0xde9adeb1UL, 0x0a0cc32cUL, 0xbe197029UL, 0x84a00940UL, 0xbb243a0fUL, 0xb4d137cfUL, 0xb44e79f0UL, 0x049eedfdUL,
-0x0b15a15dUL, 0x480d3168UL, 0x8bbbde5aUL, 0x669ded42UL, 0xc7ece831UL, 0x3f8f95e7UL, 0x72df191bUL, 0x7580330dUL,
-0x94074251UL, 0x5c7dcdfaUL, 0xabbe6d63UL, 0xaa402164UL, 0xb301d40aUL, 0x02e7d1caUL, 0x53571daeUL, 0x7a3182a2UL,
-0x12a8ddecUL, 0xfdaa335dUL, 0x176f43e8UL, 0x71fb46d4UL, 0x38129022UL, 0xce949ad4UL, 0xb84769adUL, 0x965bd862UL,
-0x82f3d055UL, 0x66fb9767UL, 0x15b80b4eUL, 0x1d5b47a0UL, 0x4cfde06fUL, 0xc28ec4b8UL, 0x57e8726eUL, 0x647a78fcUL,
-0x99865d44UL, 0x608bd593UL, 0x6c200e03UL, 0x39dc5ff6UL, 0x5d0b00a3UL, 0xae63aff2UL, 0x7e8bd632UL, 0x70108c0cUL,
-0xbbd35049UL, 0x2998df04UL, 0x980cf42aUL, 0x9b6df491UL, 0x9e7edd53UL, 0x06918548UL, 0x58cb7e07UL, 0x3b74ef2eUL,
-0x522fffb1UL, 0xd24708ccUL, 0x1c7e27cdUL, 0xa4eb215bUL, 0x3cf1d2e2UL, 0x19b47a38UL, 0x424f7618UL, 0x35856039UL,
-0x9d17dee7UL, 0x27eb35e6UL, 0xc9aff67bUL, 0x36baf5b8UL, 0x09c467cdUL, 0xc18910b1UL, 0xe11dbf7bUL, 0x06cd1af8UL,
-0x7170c608UL, 0x2d5e3354UL, 0xd4de495aUL, 0x64c6d006UL, 0xbcc0c62cUL, 0x3dd00db3UL, 0x708f8f34UL, 0x77d51b42UL,
-0x264f620fUL, 0x24b8d2bfUL, 0x15c1b79eUL, 0x46a52564UL, 0xf8d7e54eUL, 0x3e378160UL, 0x7895cda5UL, 0x859c15a5UL,
-0xe6459788UL, 0xc37bc75fUL, 0xdb07ba0cUL, 0x0676a3abUL, 0x7f229b1eUL, 0x31842e7bUL, 0x24259fd7UL, 0xf8bef472UL,
-0x835ffcb8UL, 0x6df4c1f2UL, 0x96f5b195UL, 0xfd0af0fcUL, 0xb0fe134cUL, 0xe2506d3dUL, 0x4f9b12eaUL, 0xf215f225UL,
-0xa223736fUL, 0x9fb4c428UL, 0x25d04979UL, 0x34c713f8UL, 0xc4618187UL, 0xea7a6e98UL, 0x7cd16efcUL, 0x1436876cUL,
-0xf1544107UL, 0xbedeee14UL, 0x56e9af27UL, 0xa04aa441UL, 0x3cf7c899UL, 0x92ecbae6UL, 0xdd67016dUL, 0x151682ebUL,
-0xa842eedfUL, 0xfdba60b4UL, 0xf1907b75UL, 0x20e3030fUL, 0x24d8c29eUL, 0xe139673bUL, 0xefa63fb8UL, 0x71873054UL,
-0xb6f2cf3bUL, 0x9f326442UL, 0xcb15a4ccUL, 0xb01a4504UL, 0xf1e47d8dUL, 0x844a1be5UL, 0xbae7dfdcUL, 0x42cbda70UL,
-0xcd7dae0aUL, 0x57e85b7aUL, 0xd53f5af6UL, 0x20cf4d8cUL, 0xcea4d428UL, 0x79d130a4UL, 0x3486ebfbUL, 0x33d3cddcUL,
-0x77853b53UL, 0x37effcb5UL, 0xc5068778UL, 0xe580b3e6UL, 0x4e68b8f4UL, 0xc5c8b37eUL, 0x0d809ea2UL, 0x398feb7cUL,
-0x132a4f94UL, 0x43b7950eUL, 0x2fee7d1cUL, 0x223613bdUL, 0xdd06caa2UL, 0x37df932bUL, 0xc4248289UL, 0xacf3ebc3UL,
-0x5715f6b7UL, 0xef3478ddUL, 0xf267616fUL, 0xc148cbe4UL, 0x9052815eUL, 0x5e410fabUL, 0xb48a2465UL, 0x2eda7fa4UL,
-0xe87b40e4UL, 0xe98ea084UL, 0x5889e9e1UL, 0xefd390fcUL, 0xdd07d35bUL, 0xdb485694UL, 0x38d7e5b2UL, 0x57720101UL,
-0x730edebcUL, 0x5b643113UL, 0x94917e4fUL, 0x503c2fbaUL, 0x646f1282UL, 0x7523d24aUL, 0xe0779695UL, 0xf9c17a8fUL,
-0x7a5b2121UL, 0xd187b896UL, 0x29263a4dUL, 0xba510cdfUL, 0x81f47c9fUL, 0xad1163edUL, 0xea7b5965UL, 0x1a00726eUL,
-0x11403092UL, 0x00da6d77UL, 0x4a0cdd61UL, 0xad1f4603UL, 0x605bdfb0UL, 0x9eedc364UL, 0x22ebe6a8UL, 0xcee7d28aUL,
-0xa0e736a0UL, 0x5564a6b9UL, 0x10853209UL, 0xc7eb8f37UL, 0x2de705caUL, 0x8951570fUL, 0xdf09822bUL, 0xbd691a6cUL,
-0xaa12e4f2UL, 0x87451c0fUL, 0xe0f6a27aUL, 0x3ada4819UL, 0x4cf1764fUL, 0x0d771c2bUL, 0x67cdb156UL, 0x350d8384UL,
-0x5938fa0fUL, 0x42399ef3UL, 0x36997b07UL, 0x0e84093dUL, 0x4aa93e61UL, 0x8360d87bUL, 0x1fa98b0cUL, 0x1149382cUL,
-0xe97625a5UL, 0x0614d1b7UL, 0x0e25244bUL, 0x0c768347UL, 0x589e8d82UL, 0x0d2059d1UL, 0xa466bb1eUL, 0xf8da0a82UL,
-0x04f19130UL, 0xba6e4ec0UL, 0x99265164UL, 0x1ee7230dUL, 0x50b2ad80UL, 0xeaee6801UL, 0x8db2a283UL, 0xea8bf59eUL};
-
-#else
-
-uint32_t s5[] PROGMEM = {
-0x040cc97eUL, 0xb9746e2cUL, 0xdf660e9bUL, 0x117933a6UL, 0xff7f6ab8UL, 0xf558d31dUL, 0x449ddd44UL, 0x7f163117UL,
-0xfaf1fb08UL, 0xcc11f5e7UL, 0x001b05d2UL, 0x00ba5a73UL, 0xd822b72aUL, 0xcb816338UL, 0x3a24f6acUL, 0x7afdbe69UL,
-0x7fe7a2e6UL, 0xcd20c7f0UL, 0x164849c4UL, 0x80c1f5ccUL, 0x40168538UL, 0x48a8b015UL, 0xcb188be6UL, 0xffdeaa4cUL,
-0x010a485fUL, 0xaab21204UL, 0xfc149825UL, 0xe2efd041UL, 0x8db4404eUL, 0xfbb68e24UL, 0xfe1cba8dUL, 0x029ba941UL,
-0x040a551aUL, 0xcb658fbaUL, 0xe7f45172UL, 0x2517a595UL, 0xd7ec06c1UL, 0x0a98a597UL, 0xaab939c5UL, 0x6afe794dUL,
-0x63f7f3f2UL, 0x4080af68UL, 0x569e0cedUL, 0x8b95b411UL, 0x885aebe1UL, 0xb0e60987UL, 0x5671e0d7UL, 0xa7fe294eUL,
-0x2de56663UL, 0x00c0d102UL, 0x058eacc4UL, 0x71f57793UL, 0x2a37050cUL, 0xf2358557UL, 0x02be6122UL, 0xc9a042d6UL,
-0x80a213dfUL, 0xd25bb574UL, 0xc0992168UL, 0xece521d4UL, 0xe83cfb53UL, 0xb3edadc8UL, 0xc97fa828UL, 0x8199953dUL,
-0x00f91f5cUL, 0x99d338feUL, 0x0bff4e0cUL, 0xea072406UL, 0xb14f2faaUL, 0x7669b94fUL, 0x0595c790UL, 0x74a7a8b0UL,
-0xffa155efUL, 0xc2a29ce5UL, 0x272db6a6UL, 0x63426ae6UL, 0x1f0065dfUL, 0x6609c50eUL, 0xbc55dddfUL, 0x5506de29UL,
-0x9a731e91UL, 0x7589af17UL, 0x1c91c732UL, 0x6894f889UL, 0x80e9010dUL, 0xf4554752UL, 0xc93cb603UL, 0xb244c80cUL,
-0xaaf0f3bcUL, 0xe936ac87UL, 0x26743ae5UL, 0x2bd8b301UL, 0x49749e1aUL, 0x7e2dee64UL, 0xdab1dbcdUL, 0x1049c901UL,
-0x80bf68b8UL, 0xfdf3260dUL, 0xe7ed4293UL, 0x84c2a504UL, 0xb6376763UL, 0x16b6f550UL, 0xe36647f2UL, 0xc136ca8eUL,
-0xdb056e13UL, 0x9183f1feUL, 0x377a88fbUL, 0xd4f7e7d6UL, 0xc97dfbc7UL, 0xdffc6330UL, 0xde89f5b6UL, 0xda4129ecUL,
-0x9566e426UL, 0x196456b7UL, 0xc5ef54f6UL, 0xb7588dd0UL, 0x01549248UL, 0x7fcbbac1UL, 0x0f55ffe5UL, 0x493008b6UL,
-0xe8d0b55bUL, 0x5a2ed787UL, 0xe16e6aabUL, 0xce663a22UL, 0xcdf32bc6UL, 0xf985089eUL, 0x473ecb68UL, 0x0f016c08UL,
-0x20e81da2UL, 0xde698bd1UL, 0x7757f6f3UL, 0xf6c302faUL, 0xc3da7e40UL, 0x50d5b3cbUL, 0x4d089317UL, 0xba0ed7b0UL,
-0xd578b30aUL, 0x0cfb51d9UL, 0x56dad7deUL, 0xe4bb2441UL, 0x560bca94UL, 0xd155570fUL, 0x6ee5e1e0UL, 0xbeb58461UL,
-0x9f240a58UL, 0xc04bf794UL, 0x8e8827e3UL, 0x61557b9fUL, 0x8002dcc3UL, 0x15776805UL, 0xd76b6c64UL, 0xb34d9044UL,
-0xa3f0b466UL, 0x8a64f1c0UL, 0xafd57e69UL, 0xf62fe949UL, 0x4f379e30UL, 0x6a35b62cUL, 0x73858085UL, 0x40f89149UL,
-0x02aef076UL, 0x4de83b08UL, 0x9a1c4228UL, 0x06944844UL, 0xb84c6e73UL, 0x102909c1UL, 0xc65fc98bUL, 0xf49c867dUL,
-0x6f614f13UL, 0x8d11772eUL, 0xe12b1bb3UL, 0x72b490aaUL, 0x17d7a53cUL, 0xba1b167dUL, 0x1090ad9cUL, 0xa22b46afUL,
-0xd259e49fUL, 0x5945d345UL, 0x13daf2d9UL, 0x8754c6dbUL, 0x4ef9e4f3UL, 0x6f486d17UL, 0xea137c09UL, 0xc7a51d63UL,
-0x82735f44UL, 0xf4835617UL, 0x976ac6cdUL, 0x8802be70UL, 0x72cfcdb3UL, 0xf3d25d6eUL, 0x79609320UL, 0xa5809b45UL,
-0xdbe260beUL, 0x0131c2a9UL, 0x5c31a5ebUL, 0xf2424e22UL, 0x72155c1cUL, 0x2c1b72f6UL, 0xf3ffd21aUL, 0x4e40258cUL,
-0x2fd74e32UL, 0xfdb76740UL, 0x8e132305UL, 0x78bca35cUL, 0x6ed60fdcUL, 0x83229275UL, 0x176b4d78UL, 0x6eb1eb58UL,
-0x854f0944UL, 0x871d483fUL, 0x7baefefcUL, 0x76ffb577UL, 0xbf02238cUL, 0x5675f4aaUL, 0x2ab0465fUL, 0x0128092bUL,
-0xf7f5383dUL, 0x361fa80cUL, 0x8a4aaf52UL, 0xc0e7d566UL, 0x74083bdfUL, 0x10510595UL, 0xa8d75a1bUL, 0xadd51ef6UL,
-0x79e4f66cUL, 0x84817520UL, 0x65faced0UL, 0x58bef788UL, 0x2668044aUL, 0xf3f8f60fUL, 0x707f9ca0UL, 0xa0ab4653UL,
-0x286ce95cUL, 0xa3ed76e1UL, 0x7f30ac6bUL, 0xd2296837UL, 0xa90f3685UL, 0x2afee317UL, 0x6797b724UL, 0x206ba9f5UL,
-0x9525cdd6UL, 0xbf1eff68UL, 0x2c445575UL, 0xbe069ff1UL, 0x9a65e0f9UL, 0x1d49b9eeUL, 0x18070134UL, 0xb8ca30bbUL,
-0x15fe22e8UL, 0x83095788UL, 0x49620e75UL, 0x557e62daUL, 0xa8ff765eUL, 0x464553b1UL, 0x08de476dUL, 0xd4e7e9efUL};
-
-
-uint32_t s6[] PROGMEM = {
-0x9d8ffaf6UL, 0xe16cac2cUL, 0x6748a34cUL, 0x7c7f33e2UL, 0xe708db95UL, 0xb4436801UL, 0xbc5cedecUL, 0xac535532UL,
-0x60099fbfUL, 0xede2a1dfUL, 0x9d57f083UL, 0xb986ed63UL, 0xb8a6b61aUL, 0x39be5edeUL, 0x32f78ff3UL, 0x38b18989UL,
-0x6149f133UL, 0xbd3719c0UL, 0xdac606f5UL, 0x7e5e62e4UL, 0x99ea08a3UL, 0x3ce3234eUL, 0xccd7cb79UL, 0x6743a148UL,
-0x199614a3UL, 0xd54bc9feUL, 0x4a1714a1UL, 0x6618a0eaUL, 0x2ddb84a0UL, 0x6f48a809UL, 0x4a6188a8UL, 0x98af0029UL,
-0x91596601UL, 0x632899e1UL, 0x600cf3c8UL, 0x3cef782eUL, 0x3219d5d0UL, 0x14ec0fcfUL, 0xd207caf7UL, 0x7220a8d0UL,
-0x7e1941fdUL, 0xb0a60593UL, 0xdae36be8UL, 0xcdd3be74UL, 0x3ca52d37UL, 0x48447f4cUL, 0x40d4b5daUL, 0xc30eba6dUL,
-0xa7193908UL, 0xd9eeba9fUL, 0xb0cfdb49UL, 0x530c674eUL, 0x019c3d5cUL, 0x41b9bd64UL, 0x6a630e2cUL, 0xcdd97dbaUL,
-0x88736feaUL, 0x62c70be7UL, 0xdb9af235UL, 0x8ddd4c5cUL, 0x8c8dd4f0UL, 0xe25381b8UL, 0x6698a108UL, 0xc8eae21aUL,
-0x89af4c28UL, 0x238292aaUL, 0x53be3493UL, 0xbf213a3bUL, 0xe34b4316UL, 0x0639ea9aUL, 0x6ec3e8efUL, 0xd9cd90f8UL,
-0xae6d2280UL, 0xa3a440c3UL, 0x099c7edfUL, 0x07a894a6UL, 0xcc5e7c5bUL, 0xa6b31d22UL, 0x2fa0699aUL, 0x548a8168UL,
-0x6f29b2ceUL, 0x3a84c053UL, 0x553689feUL, 0x8ae6bf25UL, 0xbc8a62b4UL, 0xbf2e22cfUL, 0x486fac25UL, 0x8793a9a9UL,
-0x65dbbd53UL, 0xe7fb6fe7UL, 0x78fd67e9UL, 0x6335a90bUL, 0xc12b348eUL, 0xe91ba1e8UL, 0x0d748049UL, 0xfc7d08c8UL,
-0x99bfe48dUL, 0xa00111a1UL, 0x7579d37fUL, 0xc0265adaUL, 0x4f991fe8UL, 0x89cd2895UL, 0xed9f33fdUL, 0xbf3478b8UL,
-0x6d45045fUL, 0x98862522UL, 0x3bc8c4c9UL, 0xbe56c12dUL, 0xaa8d624fUL, 0xc55ef557UL, 0xbe0a22e2UL, 0xbf6e91d2UL,
-0x955bc74eUL, 0xc0c3f224UL, 0x995dd142UL, 0xa07f0dcdUL, 0xff276e7bUL, 0xf08adca8UL, 0x06c14573UL, 0x2f231ef4UL,
-0x86231635UL, 0x2689eae6UL, 0x94b03333UL, 0xf2c67e15UL, 0xaf742b37UL, 0xe4732569UL, 0x48d8a9e9UL, 0x890216f3UL,
-0x1def623aUL, 0x38e287a7UL, 0x76f6a5f3UL, 0x53483674UL, 0x63109520UL, 0x8d697645UL, 0x07d4fab6UL, 0x50f92a59UL,
-0x2335f736UL, 0x876efb4cUL, 0xc0cea47dUL, 0xaa2d156cUL, 0xa89603cbUL, 0x5dfe0dc5UL, 0xab07d7fcUL, 0x2fc42109UL,
-0xbbf0df89UL, 0x78bee25fUL, 0x334f8f44UL, 0xc9134675UL, 0x8dd0052bUL, 0x85d5b948UL, 0x419404dcUL, 0x9b8f09c8UL,
-0x86e7ed7dUL, 0x73339ac3UL, 0x05004142UL, 0x5117096aUL, 0xa6c8f30eUL, 0xd6720089UL, 0x82762028UL, 0xbef7a9a9UL,
-0x9d6732bfUL, 0x755b5bd4UL, 0x00fd53b3UL, 0x58e3b0cbUL, 0x0a220f83UL, 0x14b28f1fUL, 0x08cf72d3UL, 0x134a3cccUL,
-0x6631f68cUL, 0xbe871c06UL, 0x888fc988UL, 0x97e36260UL, 0x7a8ecf47UL, 0x8352c8b6UL, 0xfbacc23cUL, 0x7669c03fUL,
-0x52028f4eUL, 0x4d31d864UL, 0xe37038daUL, 0x5954661eUL, 0xf00809c1UL, 0xa5213051UL, 0xb7685b6cUL, 0xa08a2f82UL,
-0x3ecd0730UL, 0xef9e7174UL, 0x812687dcUL, 0xd4403307UL, 0xd92f437eUL, 0x41c25e0cUL, 0x6c280988UL, 0x91d892f5UL,
-0xf630a908UL, 0x05f37e95UL, 0xbdfffbb7UL, 0x6fe966c2UL, 0x98ace46fUL, 0xc0ec73b1UL, 0x2ab460bcUL, 0xda983495UL,
-0x12aea1fbUL, 0x36d74b2dUL, 0xabfa250fUL, 0xebfcf3a4UL, 0x239196e2UL, 0x3d0c7f25UL, 0x49af4893UL, 0xbc001436UL,
-0x4a6f81e8UL, 0x00f21438UL, 0x4340f9a3UL, 0xc2547a9cUL, 0x574f70bcUL, 0xf9e741daUL, 0x3ad35ac2UL, 0x84a0f454UL,
-0x05557fb1UL, 0xbe7c3559UL, 0xc815bdedUL, 0xabc5977fUL, 0xb5c75abaUL, 0xafdef6b6UL, 0x3a9c473aUL, 0x25da0253UL,
-0x6a7e3d65UL, 0x498d2654UL, 0xea77a451UL, 0x5bd51750UL, 0x885dd2d7UL, 0x766c1344UL, 0xc8a80404UL, 0x21a1e5b8UL,
-0x8a921ab8UL, 0x6958ed60UL, 0x965bc597UL, 0x1b99eceaUL, 0x13599329UL, 0xf1b7fd01UL, 0xfa8d8e08UL, 0xf5f6b69aUL,
-0x9fbf4c3bUL, 0xabe35d4aUL, 0x351d05e6UL, 0x55d8e1a0UL, 0xf14c6bd3UL, 0xebed44f5UL, 0x2435e9b0UL, 0xbd8fbbbeUL,
-0xcf62d7a2UL, 0x542fc949UL, 0x31f3b538UL, 0x54a42871UL, 0x05293948UL, 0xb81d5ba6UL, 0xbd971c85UL, 0x2fcf75d6UL};
-
-
-uint32_t s7[] PROGMEM = {
-0x1940e085UL, 0x67f52b33UL, 0xffbf2d66UL, 0x9356c6cfUL, 0x6f7f8d2aUL, 0x12c99babUL, 0xa10860deUL, 0x1fda2820UL,
-0xe7bc2702UL, 0x1629644dUL, 0x00c3fa18UL, 0x828bf150UL, 0x11cbb22cUL, 0x5ce732b2UL, 0xf295364bUL, 0xde0787b2UL,
-0xf6bc5fa0UL, 0xe98141cdUL, 0x0c2150e1UL, 0xbdf14ee2UL, 0x81c368b1UL, 0x89e7e4fdUL, 0xd8b0795cUL, 0x43fd8b1eUL,
-0x0150494dUL, 0x4143be38UL, 0x1dee3c91UL, 0x3f9ca792UL, 0xbe669708UL, 0xf4adeebaUL, 0xcfbe8612UL, 0x19cbeab6UL,
-0x00c26026UL, 0xe4bd6575UL, 0x7a1f2464UL, 0xa9dc4882UL, 0x66adb3c3UL, 0x86601328UL, 0xa8dfd80bUL, 0xf21c6d35UL,
-0xbe897710UL, 0xcee9b2b3UL, 0x8faa0205UL, 0x1e35c00bUL, 0x2af56b16UL, 0x82ff12ebUL, 0x116948e3UL, 0x16754dd3UL,
-0xff3a7b4eUL, 0x1b67435fUL, 0x37e0f69cUL, 0x83ac8149UL, 0xce664233UL, 0xb741938cUL, 0xc054d8d0UL, 0x886c3acbUL,
-0x2928bc47UL, 0x37ba2547UL, 0x2bd26aa6UL, 0x1e1fd67aUL, 0xfaba5c0cUL, 0x07f13744UL, 0x6299e7b6UL, 0x16d8d242UL,
-0x8812960aUL, 0x6ec0a5e1UL, 0x679e7413UL, 0x1a08fc72UL, 0xf739d1b1UL, 0x453758f9UL, 0x58df19cfUL, 0x56f7c3beUL,
-0x30ba6ec0UL, 0x241b2107UL, 0x2988c245UL, 0x7f315ec9UL, 0x11c58ebcUL, 0xe946bc38UL, 0x14fae6c6UL, 0x4a58e8baUL,
-0x46bc4eadUL, 0x8b508f46UL, 0x5f432978UL, 0x3b1824f1UL, 0x9fba1d82UL, 0xf40ff6afUL, 0x6d4e2ceaUL, 0x6492e316UL,
-0x8b4a5492UL, 0xc34f9b00UL, 0xed8ca6abUL, 0x786fc99aUL, 0x9ab7a506UL, 0x6e6e85b2UL, 0xa93cec1aUL, 0x888683beUL,
-0xe904080eUL, 0x56bef155UL, 0x3b36e5e7UL, 0x5df2a1b3UL, 0x85bbdef7UL, 0x3c03fe61UL, 0x33627416UL, 0x284c033cUL,
-0x740c6ddaUL, 0x6cc5aa79UL, 0xade1e43cUL, 0x02c8f051UL, 0x5af3f898UL, 0x9fa42616UL, 0x292bd8eeUL, 0xe32f381dUL,
-0x9ab94f0cUL, 0x785732bbUL, 0x7bd9c63eUL, 0xa9a6776eUL, 0x5c8b65cbUL, 0xc73052d4UL, 0x8b40d12bUL, 0xb73ec060UL,
-0x788d06b9UL, 0xf45437a3UL, 0x7dc830f4UL, 0x0213a7c8UL, 0x328c6db9UL, 0xbee7d4ebUL, 0x2d9d8bbeUL, 0x06fb7979UL,
-0x085322e7UL, 0x77cf758bUL, 0xa48def11UL, 0x58c883e0UL, 0x6f786b8dUL, 0xa617635aUL, 0xa0f75cfaUL, 0x3300da5dUL,
-0xb0bf8ef2UL, 0x10c3b9f5UL, 0x80c2eaa0UL, 0x7a76b908UL, 0xb0d2d9a3UL, 0x1742d379UL, 0x8d711a02UL, 0x6a33c69aUL,
-0x60fd1127UL, 0xe3508043UL, 0xa8089906UL, 0xc4ed7f3dUL, 0xef2b6d82UL, 0x7684eb4eUL, 0x25cf8d48UL, 0x66d5c936UL,
-0x414ee728UL, 0xca0a61c2UL, 0xcfa9493dUL, 0xdfb9e3baUL, 0xe68d5fb6UL, 0x64afae92UL, 0xe6d5c73aUL, 0x0905a89eUL,
-0x7d012bf2UL, 0x703f17a4UL, 0xc3161eddUL, 0xf9d7e015UL, 0x87b8b150UL, 0xd54f9f2bUL, 0x82ba5a62UL, 0x6279016aUL,
-0x9c1bc02eUL, 0xa98a4815UL, 0x40e716d7UL, 0x2c5a0540UL, 0x229ad293UL, 0x9abf2de3UL, 0xb9458705UL, 0x1edc5334UL,
-0x6e2999d6UL, 0x6fff6c49UL, 0x86499f1cUL, 0x07ede2dfUL, 0xd14272b8UL, 0xae7ede19UL, 0x1a563e05UL, 0x8c6fad15UL,
-0x1c6c6266UL, 0x4cc25471UL, 0x2a2b08eaUL, 0x3929eb93UL, 0xf0b0dc17UL, 0xaef2d458UL, 0xfb94a29eUL, 0x4c56cf52UL,
-0x66fe8398UL, 0x8105c42eUL, 0xc3533976UL, 0x2e69d601UL, 0x08c1a0d3UL, 0x0e16e7a1UL, 0xa6dff2e4UL, 0x85d23e69UL,
-0x98469074UL, 0xdd0e2b4cUL, 0x5676754fUL, 0x7833395dUL, 0x4f2332a1UL, 0x5d1c323dUL, 0x94e1f5c3UL, 0x0193264bUL,
-0x2f029fc7UL, 0x7e7e993cUL, 0x04954f5eUL, 0xbdfbfa3fUL, 0x0eadf776UL, 0xf4936629UL, 0x6fce1f3dUL, 0xbe451ec6UL,
-0x34abb5d3UL, 0xb7f92bf7UL, 0xc034041bUL, 0x67b5724eUL, 0x3da39255UL, 0x019322b5UL, 0x7fa8d2cfUL, 0x67b7ae60UL,
-0x6b381418UL, 0x3dc3bc30UL, 0x7dc0a038UL, 0xf20616fdUL, 0x9b5163c3UL, 0x90d39d58UL, 0xe6f87954UL, 0x47d6b81cUL,
-0xa961fd97UL, 0xf45977eaUL, 0x9d53572dUL, 0xcf589a56UL, 0xad634ee8UL, 0x781b2e46UL, 0x7ef88065UL, 0x147981f3UL,
-0xf455da91UL, 0xf330a240UL, 0x358f98d1UL, 0xd218e3b6UL, 0xbc50fa3fUL, 0x21f0403dUL, 0xaebdc0c3UL, 0x4cc25849UL,
-0xb2368f51UL, 0x70d3b184UL, 0x83ceed0fUL, 0xdada8d87UL, 0xc779a2f2UL, 0xe81be094UL, 0x4b6f7190UL, 0xa38a4b95UL};
-
-
-uint32_t s8[] PROGMEM = {
-0x0d3016e2UL, 0xfcffddbbUL, 0xbddaeba7UL, 0x95806435UL, 0xb7f88977UL, 0x1b12c1e6UL, 0x0016240eUL, 0xb5e82c05UL,
-0xb0cfa911UL, 0x112f95e5UL, 0x0a99e7ecUL, 0x74d18693UL, 0x1c93422aUL, 0x1181e376UL, 0x3aef2db1UL, 0xfcdddd37UL,
-0xb1de9adeUL, 0x2cc30c0aUL, 0x297019beUL, 0x4009a084UL, 0x0f3a24bbUL, 0xcf37d1b4UL, 0xf0794eb4UL, 0xfded9e04UL,
-0x5da1150bUL, 0x68310d48UL, 0x5adebb8bUL, 0x42ed9d66UL, 0x31e8ecc7UL, 0xe7958f3fUL, 0x1b19df72UL, 0x0d338075UL,
-0x51420794UL, 0xfacd7d5cUL, 0x636dbeabUL, 0x642140aaUL, 0x0ad401b3UL, 0xcad1e702UL, 0xae1d5753UL, 0xa282317aUL,
-0xecdda812UL, 0x5d33aafdUL, 0xe8436f17UL, 0xd446fb71UL, 0x22901238UL, 0xd49a94ceUL, 0xad6947b8UL, 0x62d85b96UL,
-0x55d0f382UL, 0x6797fb66UL, 0x4e0bb815UL, 0xa0475b1dUL, 0x6fe0fd4cUL, 0xb8c48ec2UL, 0x6e72e857UL, 0xfc787a64UL,
-0x445d8699UL, 0x93d58b60UL, 0x030e206cUL, 0xf65fdc39UL, 0xa3000b5dUL, 0xf2af63aeUL, 0x32d68b7eUL, 0x0c8c1070UL,
-0x4950d3bbUL, 0x04df9829UL, 0x2af40c98UL, 0x91f46d9bUL, 0x53dd7e9eUL, 0x48859106UL, 0x077ecb58UL, 0x2eef743bUL,
-0xb1ff2f52UL, 0xcc0847d2UL, 0xcd277e1cUL, 0x5b21eba4UL, 0xe2d2f13cUL, 0x387ab419UL, 0x18764f42UL, 0x39608535UL,
-0xe7de179dUL, 0xe635eb27UL, 0x7bf6afc9UL, 0xb8f5ba36UL, 0xcd67c409UL, 0xb11089c1UL, 0x7bbf1de1UL, 0xf81acd06UL,
-0x08c67071UL, 0x54335e2dUL, 0x5a49ded4UL, 0x06d0c664UL, 0x2cc6c0bcUL, 0xb30dd03dUL, 0x348f8f70UL, 0x421bd577UL,
-0x0f624f26UL, 0xbfd2b824UL, 0x9eb7c115UL, 0x6425a546UL, 0x4ee5d7f8UL, 0x6081373eUL, 0xa5cd9578UL, 0xa5159c85UL,
-0x889745e6UL, 0x5fc77bc3UL, 0x0cba07dbUL, 0xaba37606UL, 0x1e9b227fUL, 0x7b2e8431UL, 0xd79f2524UL, 0x72f4bef8UL,
-0xb8fc5f83UL, 0xf2c1f46dUL, 0x95b1f596UL, 0xfcf00afdUL, 0x4c13feb0UL, 0x3d6d50e2UL, 0xea129b4fUL, 0x25f215f2UL,
-0x6f7323a2UL, 0x28c4b49fUL, 0x7949d025UL, 0xf813c734UL, 0x878161c4UL, 0x986e7aeaUL, 0xfc6ed17cUL, 0x6c873614UL,
-0x074154f1UL, 0x14eedebeUL, 0x27afe956UL, 0x41a44aa0UL, 0x99c8f73cUL, 0xe6baec92UL, 0x6d0167ddUL, 0xeb821615UL,
-0xdfee42a8UL, 0xb460bafdUL, 0x757b90f1UL, 0x0f03e320UL, 0x9ec2d824UL, 0x3b6739e1UL, 0xb83fa6efUL, 0x54308771UL,
-0x3bcff2b6UL, 0x4264329fUL, 0xcca415cbUL, 0x04451ab0UL, 0x8d7de4f1UL, 0xe51b4a84UL, 0xdcdfe7baUL, 0x70dacb42UL,
-0x0aae7dcdUL, 0x7a5be857UL, 0xf65a3fd5UL, 0x8c4dcf20UL, 0x28d4a4ceUL, 0xa430d179UL, 0xfbeb8634UL, 0xdccdd333UL,
-0x533b8577UL, 0xb5fcef37UL, 0x788706c5UL, 0xe6b380e5UL, 0xf4b8684eUL, 0x7eb3c8c5UL, 0xa29e800dUL, 0x7ceb8f39UL,
-0x944f2a13UL, 0x0e95b743UL, 0x1c7dee2fUL, 0xbd133622UL, 0xa2ca06ddUL, 0x2b93df37UL, 0x898224c4UL, 0xc3ebf3acUL,
-0xb7f61557UL, 0xdd7834efUL, 0x6f6167f2UL, 0xe4cb48c1UL, 0x5e815290UL, 0xab0f415eUL, 0x65248ab4UL, 0xa47fda2eUL,
-0xe4407be8UL, 0x84a08ee9UL, 0xe1e98958UL, 0xfc90d3efUL, 0x5bd307ddUL, 0x945648dbUL, 0xb2e5d738UL, 0x01017257UL,
-0xbcde0e73UL, 0x1331645bUL, 0x4f7e9194UL, 0xba2f3c50UL, 0x82126f64UL, 0x4ad22375UL, 0x959677e0UL, 0x8f7ac1f9UL,
-0x21215b7aUL, 0x96b887d1UL, 0x4d3a2629UL, 0xdf0c51baUL, 0x9f7cf481UL, 0xed6311adUL, 0x65597beaUL, 0x6e72001aUL,
-0x92304011UL, 0x776dda00UL, 0x61dd0c4aUL, 0x03461fadUL, 0xb0df5b60UL, 0x64c3ed9eUL, 0xa8e6eb22UL, 0x8ad2e7ceUL,
-0xa036e7a0UL, 0xb9a66455UL, 0x09328510UL, 0x378febc7UL, 0xca05e72dUL, 0x0f575189UL, 0x2b8209dfUL, 0x6c1a69bdUL,
-0xf2e412aaUL, 0x0f1c4587UL, 0x7aa2f6e0UL, 0x1948da3aUL, 0x4f76f14cUL, 0x2b1c770dUL, 0x56b1cd67UL, 0x84830d35UL,
-0x0ffa3859UL, 0xf39e3942UL, 0x077b9936UL, 0x3d09840eUL, 0x613ea94aUL, 0x7bd86083UL, 0x0c8ba91fUL, 0x2c384911UL,
-0xa52576e9UL, 0xb7d11406UL, 0x4b24250eUL, 0x4783760cUL, 0x828d9e58UL, 0xd159200dUL, 0x1ebb66a4UL, 0x820adaf8UL,
-0x3091f104UL, 0xc04e6ebaUL, 0x64512699UL, 0x0d23e71eUL, 0x80adb250UL, 0x0168eeeaUL, 0x83a2b28dUL, 0x9ef58beaUL};
-
-
-#endif 
-
-
-#endif
-
diff --git a/cast5.c b/cast5.c
deleted file mode 100644
index 6d7f42f..0000000
--- a/cast5.c
+++ /dev/null
@@ -1,343 +0,0 @@
-/* cast5.c */
-/*
-    This file is part of the AVR-Crypto-Lib.
-    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-/* 
- * \file	cast5.c
- * \author	Daniel Otte
- * \email       daniel.otte@rub.de
- * \date 	2006-07-26
- * \par License:
- *  GPLv3 or later
- * \brief Implementation of the CAST5 (aka CAST-128) cipher algorithm as described in RFC 2144
- * 
- */
- 
- #include <stdint.h>
- #include <string.h>
- #include "cast5.h"
- #include "config.h"
- #include "debug.h"
- 
- #undef DEBUG
- 
- #ifdef DEBUG
-  #include "cli.h"
- #endif
- 
-#include "cast5-sbox.h"
-
-
- 
-#define S5(x) pgm_read_dword(&s5[(x)])
-#define S6(x) pgm_read_dword(&s6[(x)])
-#define S7(x) pgm_read_dword(&s7[(x)])
-#define S8(x) pgm_read_dword(&s8[(x)])
-
-static 
-void cast5_init_A(uint8_t *dest, uint8_t *src, bool bmode){
-	uint8_t mask = bmode?0x8:0;
-	*((uint32_t*)(&dest[0x0])) = *((uint32_t*)(&src[0x0^mask]))
-                                     ^ S5(src[0xD^mask]) ^ S6(src[0xF^mask]) 
-                                     ^ S7(src[0xC^mask]) ^ S8(src[0xE^mask]) 
-                                     ^ S7(src[0x8^mask]);
-	*((uint32_t*)(&dest[0x4])) = *((uint32_t*)(&src[0x8^mask])) 
-                                     ^ S5(dest[0x0]) ^ S6(dest[0x2]) 
-                                     ^ S7(dest[0x1]) ^ S8(dest[0x3]) 
-                                     ^ S8(src[0xA^mask]);
-	*((uint32_t*)(&dest[0x8])) = *((uint32_t*)(&src[0xC^mask])) 
-                                     ^ S5(dest[0x7]) ^ S6(dest[0x6]) 
-                                     ^ S7(dest[0x5]) ^ S8(dest[0x4]) 
-                                     ^ S5(src[0x9^mask]);
-	*((uint32_t*)(&dest[0xC])) = *((uint32_t*)(&src[0x4^mask])) 
-                                     ^ S5(dest[0xA]) 
-                                     ^ S6(dest[0x9]) 
-                                     ^ S7(dest[0xB]) 
-                                     ^ S8(dest[0x8]) 
-                                     ^ S6(src[0xB^mask]);
-}
-
-static
-void cast5_init_M(uint8_t *dest, uint8_t *src, bool nmode, bool xmode){
-	uint8_t nmt[] = {0xB, 0xA, 0x9, 0x8, 
-                         0xF, 0xE, 0xD, 0xC, 
-                         0x3, 0x2, 0x1, 0x0, 
-                         0x7, 0x6, 0x5, 0x4}; /* nmode table */
-	uint8_t xmt[4][4] = {{0x2, 0x6, 0x9, 0xC}, 
-                             {0x8, 0xD, 0x3, 0x7}, 
-                             {0x3, 0x7, 0x8, 0xD}, 
-                             {0x9, 0xC, 0x2, 0x6}};
-	#define NMT(x) (src[nmode?nmt[(x)]:(x)])
-	#define XMT(x) (src[xmt[(xmode<<1) + nmode][(x)]])
-	*((uint32_t*)(&dest[0x0])) = S5(NMT(0x8)) ^ S6(NMT(0x9)) ^ S7(NMT(0x7)) ^ S8(NMT(0x6)) ^ S5(XMT(0));
-	*((uint32_t*)(&dest[0x4])) = S5(NMT(0xA)) ^ S6(NMT(0xB)) ^ S7(NMT(0x5)) ^ S8(NMT(0x4)) ^ S6(XMT(1));
-	*((uint32_t*)(&dest[0x8])) = S5(NMT(0xC)) ^ S6(NMT(0xD)) ^ S7(NMT(0x3)) ^ S8(NMT(0x2)) ^ S7(XMT(2));
-	*((uint32_t*)(&dest[0xC])) = S5(NMT(0xE)) ^ S6(NMT(0xF)) ^ S7(NMT(0x1)) ^ S8(NMT(0x0)) ^ S8(XMT(3));
-}
-
-#define S5B(x) pgm_read_byte(3+(uint8_t*)(&s5[(x)]))
-#define S6B(x) pgm_read_byte(3+(uint8_t*)(&s6[(x)]))
-#define S7B(x) pgm_read_byte(3+(uint8_t*)(&s7[(x)]))
-#define S8B(x) pgm_read_byte(3+(uint8_t*)(&s8[(x)]))
-
-static
-void cast5_init_rM(uint8_t *klo, uint8_t *khi, uint8_t offset, uint8_t *src, bool nmode, bool xmode){
-	uint8_t nmt[] = {0xB, 0xA, 0x9, 0x8, 0xF, 0xE, 0xD, 0xC, 0x3, 0x2, 0x1, 0x0, 0x7, 0x6, 0x5, 0x4}; /* nmode table */
-	uint8_t xmt[4][4] = {{0x2, 0x6, 0x9, 0xC}, {0x8, 0xD, 0x3, 0x7}, {0x3, 0x7, 0x8, 0xD}, {0x9, 0xC, 0x2, 0x6}};
-	uint8_t t, h=0; 
-	t = S5B(NMT(0x8)) ^ S6B(NMT(0x9)) ^ S7B(NMT(0x7)) ^ S8B(NMT(0x6)) ^ S5B(XMT(0));
-		klo[offset*2] |= (t & 0x0f);
-		h |= (t&0x10); h>>=1;
-	t = S5B(NMT(0xA)) ^ S6B(NMT(0xB)) ^ S7B(NMT(0x5)) ^ S8B(NMT(0x4)) ^ S6B(XMT(1));
-		klo[offset*2] |= (t<<4) & 0xf0;
-		h |= t&0x10; h>>=1;
-	t = S5B(NMT(0xC)) ^ S6B(NMT(0xD)) ^ S7B(NMT(0x3)) ^ S8B(NMT(0x2)) ^ S7B(XMT(2));
-		klo[offset*2+1] |= t&0xf;
-		h |= t&0x10; h>>=1;
-	t = S5B(NMT(0xE)) ^ S6B(NMT(0xF)) ^ S7B(NMT(0x1)) ^ S8B(NMT(0x0)) ^ S8B(XMT(3));
-		klo[offset*2+1] |= t<<4;
-		h |= t&0x10; h >>=1;
-	#ifdef DEBUG
-		cli_putstr("\r\n\t h="); cli_hexdump(&h,1);
-	#endif
-	khi[offset>>1] |= h<<((offset&0x1)?4:0);
-}
-
-#define S_5X(s) pgm_read_dword(&s5[BPX[(s)]])
-#define S_6X(s) pgm_read_dword(&s6[BPX[(s)]])
-#define S_7X(s) pgm_read_dword(&s7[BPX[(s)]])
-#define S_8X(s) pgm_read_dword(&s8[BPX[(s)]])
-
-#define S_5Z(s) pgm_read_dword(&s5[BPZ[(s)]])
-#define S_6Z(s) pgm_read_dword(&s6[BPZ[(s)]])
-#define S_7Z(s) pgm_read_dword(&s7[BPZ[(s)]])
-#define S_8Z(s) pgm_read_dword(&s8[BPZ[(s)]])
-
-
-
-
-void cast5_init(const void* key, uint16_t keylength_b, cast5_ctx_t* s){
- 	 /* we migth return if the key is valid and if setup was successful */
-	uint32_t x[4], z[4];
-	#define BPX ((uint8_t*)&(x[0]))
-	#define BPZ ((uint8_t*)&(z[0]))
-	s->shortkey = (keylength_b<=80);
-	/* littel endian only! */
-	memset(&(x[0]), 0 ,16); /* set x to zero */
-	if(keylength_b > 128)
-		keylength_b=128;
-	memcpy(&(x[0]), key, (keylength_b+7)/8);
-	
-
-	/* todo: merge a and b and compress the whole stuff */
-	/***** A *****/
-	cast5_init_A((uint8_t*)(&z[0]), (uint8_t*)(&x[0]), false);	
-	/***** M *****/
-	cast5_init_M((uint8_t*)(&(s->mask[0])), (uint8_t*)(&z[0]), false, false);
-	/***** B *****/
-	cast5_init_A((uint8_t*)(&x[0]), (uint8_t*)(&z[0]), true);
-	/***** N *****/
-	cast5_init_M((uint8_t*)(&(s->mask[4])), (uint8_t*)(&x[0]), true, false);
-	/***** A *****/
-	cast5_init_A((uint8_t*)(&z[0]), (uint8_t*)(&x[0]), false);
-	/***** N' *****/
-	cast5_init_M((uint8_t*)(&(s->mask[8])), (uint8_t*)(&z[0]), true, true);
-	/***** B *****/
-	cast5_init_A((uint8_t*)(&x[0]), (uint8_t*)(&z[0]), true);
-	/***** M' *****/
-	cast5_init_M((uint8_t*)(&(s->mask[12])), (uint8_t*)(&x[0]), false, true);
-	
-	/* that were the masking keys, now the rotation keys */
-	/* set the keys to zero */
-	memset(&(s->rotl[0]),0,8);
-	s->roth[0]=s->roth[1]=0;
-	/***** A *****/
-	cast5_init_A((uint8_t*)(&z[0]), (uint8_t*)(&x[0]), false);
-	/***** M *****/
-	cast5_init_rM(&(s->rotl[0]), &(s->roth[0]), 0, (uint8_t*)(&z[0]), false, false);
-	/***** B *****/
-	cast5_init_A((uint8_t*)(&x[0]), (uint8_t*)(&z[0]), true);
-	/***** N *****/
-	cast5_init_rM(&(s->rotl[0]), &(s->roth[0]), 1, (uint8_t*)(&x[0]), true, false);
-	/***** A *****/
-	cast5_init_A((uint8_t*)(&z[0]), (uint8_t*)(&x[0]), false);
-	/***** N' *****/
-	cast5_init_rM(&(s->rotl[0]), &(s->roth[0]), 2, (uint8_t*)(&z[0]), true, true);
-	/***** B *****/
-	cast5_init_A((uint8_t*)(&x[0]), (uint8_t*)(&z[0]), true);
-	/***** M' *****/
-	cast5_init_rM(&(s->rotl[0]), &(s->roth[0]), 3, (uint8_t*)(&x[0]), false, true);
-	/* done ;-) */
-}
-
-
-
-/********************************************************************************************************/
-
-#define ROTL32(a,n) ((a)<<(n) | (a)>>(32-(n)))
-#define CHANGE_ENDIAN32(x) ((x)<<24 | (x)>>24 | ((x)&0xff00)<<8 | ((x)&0xff0000)>>8 )
-
-typedef uint32_t cast5_f_t(uint32_t,uint32_t,uint8_t);
-
-#define IA 3
-#define IB 2
-#define IC 1
-#define ID 0
-
-static
-uint32_t cast5_f1(uint32_t d, uint32_t m, uint8_t r){
-	uint32_t t;
-	t = ROTL32((d + m),r);
-#ifdef DEBUG
-	uint32_t ia,ib,ic,id;
-	cli_putstr("\r\n f1("); cli_hexdump(&d, 4); cli_putc(',');
-		cli_hexdump(&m , 4); cli_putc(','); cli_hexdump(&r, 1);cli_putstr("): I=");
-		cli_hexdump(&t, 4);
-	ia = pgm_read_dword(&s1[((uint8_t*)&t)[IA]] );
-	ib = pgm_read_dword(&s2[((uint8_t*)&t)[IB]] );
-	ic = pgm_read_dword(&s3[((uint8_t*)&t)[IC]] );
-	id = pgm_read_dword(&s4[((uint8_t*)&t)[ID]] );
-	cli_putstr("\r\n\tIA="); cli_hexdump(&ia, 4);
-	cli_putstr("\r\n\tIB="); cli_hexdump(&ib, 4);
-	cli_putstr("\r\n\tIC="); cli_hexdump(&ic, 4);
-	cli_putstr("\r\n\tID="); cli_hexdump(&id, 4);
-
-	return (((ia ^ ib) - ic) + id);
-
-#else
-	
-	return (((  pgm_read_dword(&s1[((uint8_t*)&t)[IA]]) 
-                  ^ pgm_read_dword(&s2[((uint8_t*)&t)[IB]]) ) 
-		  - pgm_read_dword(&s3[((uint8_t*)&t)[IC]]) ) 
-                  + pgm_read_dword(&s4[((uint8_t*)&t)[ID]]) );
-
-#endif
-}
-
-static
-uint32_t cast5_f2(uint32_t d, uint32_t m, uint8_t r){
-	uint32_t t;
-	t = ROTL32((d ^ m),r);
-#ifdef DEBUG
-	uint32_t ia,ib,ic,id;
-	cli_putstr("\r\n f2("); cli_hexdump(&d, 4); cli_putc(',');
-		cli_hexdump(&m , 4); cli_putc(','); cli_hexdump(&r, 1);cli_putstr("): I=");
-		cli_hexdump(&t, 4);
-
-	ia = pgm_read_dword(&s1[((uint8_t*)&t)[IA]] );
-	ib = pgm_read_dword(&s2[((uint8_t*)&t)[IB]] );
-	ic = pgm_read_dword(&s3[((uint8_t*)&t)[IC]] );
-	id = pgm_read_dword(&s4[((uint8_t*)&t)[ID]] );
-	
-	cli_putstr("\r\n\tIA="); cli_hexdump(&ia, 4);
-	cli_putstr("\r\n\tIB="); cli_hexdump(&ib, 4);
-	cli_putstr("\r\n\tIC="); cli_hexdump(&ic, 4);
-	cli_putstr("\r\n\tID="); cli_hexdump(&id, 4);
-
-	return (((ia - ib) + ic) ^ id);
-#else
-	
-	return (((    pgm_read_dword(&s1[((uint8_t*)&t)[IA]]) 
-	            - pgm_read_dword(&s2[((uint8_t*)&t)[IB]]) ) 
-		    + pgm_read_dword(&s3[((uint8_t*)&t)[IC]]) ) 
-		    ^ pgm_read_dword(&s4[((uint8_t*)&t)[ID]]) );
-
-#endif
-}
-
-static
-uint32_t cast5_f3(uint32_t d, uint32_t m, uint8_t r){
-	uint32_t t;
-	t = ROTL32((m - d),r);
-
-#ifdef DEBUG
-	uint32_t ia,ib,ic,id;
-
-	cli_putstr("\r\n f3("); cli_hexdump(&d, 4); cli_putc(',');
-		cli_hexdump(&m , 4); cli_putc(','); cli_hexdump(&r, 1);cli_putstr("): I=");
-		cli_hexdump(&t, 4);
-
-	ia = pgm_read_dword(&s1[((uint8_t*)&t)[IA]] );
-	ib = pgm_read_dword(&s2[((uint8_t*)&t)[IB]] );
-	ic = pgm_read_dword(&s3[((uint8_t*)&t)[IC]] );
-	id = pgm_read_dword(&s4[((uint8_t*)&t)[ID]] );
-	
-	cli_putstr("\r\n\tIA="); cli_hexdump(&ia, 4);
-	cli_putstr("\r\n\tIB="); cli_hexdump(&ib, 4);
-	cli_putstr("\r\n\tIC="); cli_hexdump(&ic, 4);
-	cli_putstr("\r\n\tID="); cli_hexdump(&id, 4);
-	return (((ia + ib) ^ ic) - id);
-#else
-	return ((  pgm_read_dword(&s1[((uint8_t*)&t)[IA]] )
-                 + pgm_read_dword(&s2[((uint8_t*)&t)[IB]] )) 
-		 ^ pgm_read_dword(&s3[((uint8_t*)&t)[IC]] )) 
-                 - pgm_read_dword(&s4[((uint8_t*)&t)[ID]] );
-
-#endif
-}
-
-/******************************************************************************/
-
-void cast5_enc(void* block, const cast5_ctx_t *s){
-	uint32_t l,r, x, y;
-	uint8_t i;
-	cast5_f_t* f[]={cast5_f1,cast5_f2,cast5_f3};
-	l=((uint32_t*)block)[0];
-	r=((uint32_t*)block)[1];
-//	cli_putstr("\r\n round[-1] = ");
-//	cli_hexdump(&r, 4);
-	for (i=0;i<(s->shortkey?12:16);++i){
-		x = r;
-		y = (f[i%3])(CHANGE_ENDIAN32(r), CHANGE_ENDIAN32(s->mask[i]), 
-			(((s->roth[i>>3]) & (1<<(i&0x7)))?0x10:0x00) 
-			 + ( ((s->rotl[i>>1])>>((i&1)?4:0)) & 0x0f) );
-		r = l ^ CHANGE_ENDIAN32(y);
-//		cli_putstr("\r\n round["); DEBUG_B(i); cli_putstr("] = ");
-//		cli_hexdump(&r, 4);
-		l = x;
-	}
-	((uint32_t*)block)[0]=r;
-	((uint32_t*)block)[1]=l;
-}
-
-/******************************************************************************/
-
-void cast5_dec(void* block, const cast5_ctx_t *s){
-	uint32_t l,r, x, y;
-	int8_t i, rounds;
-	cast5_f_t* f[]={cast5_f1,cast5_f2,cast5_f3};
-	l=((uint32_t*)block)[0];
-	r=((uint32_t*)block)[1];
-	rounds = (s->shortkey?12:16);
-	for (i=rounds-1; i>=0 ;--i){
-		x = r;
-		y = (f[i%3])(CHANGE_ENDIAN32(r), CHANGE_ENDIAN32(s->mask[i]), 
-			(((s->roth[i>>3]) & (1<<(i&0x7)))?0x10:0x00) 
-			 + ( ((s->rotl[i>>1])>>((i&1)?4:0)) & 0x0f) );
-		r = l ^ CHANGE_ENDIAN32(y);
-		l = x;
-	}
-	((uint32_t*)block)[0]=r;
-	((uint32_t*)block)[1]=l;
-}
-
-
-/******************************************************************************/
-
-
-
-
diff --git a/cast5.h b/cast5.h
deleted file mode 100644
index b014f7c..0000000
--- a/cast5.h
+++ /dev/null
@@ -1,95 +0,0 @@
-/* cast5.h */
-/*
-    This file is part of the AVR-Crypto-Lib.
-    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-/** 
- * \file	cast5.h
- * \author	Daniel Otte
- * \date 	2006-07-26
- * \license GPLv3 or later
- * \brief Implementation of the CAST5 (aka CAST-128) cipher algorithm as described in RFC 2144
- * 
- */
-
-#ifndef CAST5_H_
-#define CAST5_H_ 
-
-#include <stdint.h> 
-
-#ifndef BOOL
-#define BOOL
- #ifndef __BOOL
- #define __BOOL
-  #ifndef __BOOL__
-  #define __BOOL__
-	typedef enum{false=0,true=1} bool;
-  #endif
- #endif
-#endif
-
-/** \typedef cast5_ctx_t
- * \brief CAST-5 context
- * 
- * A variable of this type may hold a keyschedule for the CAST-5 cipher. 
- * This context is regulary generated by the 
- * cast5_init(uint8_t* key, uint8_t keylength_b, cast5_ctx_t* s) function.
- */
-typedef struct cast5_ctx_st{
-	uint32_t	mask[16];
-	uint8_t		rotl[8];	/* 4 bit from every rotation key is stored here */
-	uint8_t		roth[2];	/* 1 bit from every rotation key is stored here */
-	bool		shortkey;
-} cast5_ctx_t;
-
-
-/** \fn void cast5_init(const void* key, uint16_t keylength_b, cast5_ctx_t* s);
- * \brief generate keyschedule/contex for CAST-5
- * 
- * This function generates the keyschedule from the supplied key for the 
- * CAST-5 cipher and stores it in a supplied ::cast5_ctx_t context.
- * \param key pointer to the key
- * \param keylength_b length of the key in bits (maximum 128 bits)
- * \param s pointer to the context
- */
-void cast5_init(const void* key, uint16_t keylength_b, cast5_ctx_t* s);
-
-/** \fn void cast5_enc(void* block, const cast5_ctx_t* s);
- * \brief encrypt a block with the CAST-5 algorithm
- * 
- * This function encrypts a block of 64 bits (8 bytes) with the CAST-5 algorithm.
- * It uses a keyschedule as generated by the 
- * cast5_init(void* key, uint8_t keylength_b, cast5_ctx_t* s) function.
- * \param block pointer to the block which gets encrypted
- * \param s pointer to the keyschedule/context
- */
-void cast5_enc(void* block, const cast5_ctx_t* s);
-
-/** \fn void cast5_dec(void* block, const cast5_ctx_t* s);
- * \brief decrypt a block with the CAST-5 algorithm
- * 
- * This function decrypts a block of 64 bits (8 bytes) with the CAST-5 algorithm.
- * It uses a keyschedule as generated by the 
- * cast5_init(void* key, uint8_t keylength_b, cast5_ctx_t* s) function.
- * \param block pointer to the block which gets decrypted
- * \param s pointer to the keyschedule/context
- */
-void cast5_dec(void* block, const cast5_ctx_t* s);
-
-
-
-#endif
-
diff --git a/cast5/cast5-sbox.h b/cast5/cast5-sbox.h
new file mode 100644
index 0000000..4a8b6bf
--- /dev/null
+++ b/cast5/cast5-sbox.h
@@ -0,0 +1,601 @@
+/* cast5-sbox.h */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/* 
+ * File:	cast5-sbox.h
+ * Author:	Daniel Otte
+ * Date: 	26.07.2006
+ * License: GPL
+ * Description: sboxes for CAST5 (aka CAST-128) cipher algorithm as described in RFC 2144.
+ * 
+ */
+ 
+#ifndef CAST5_SBOX_H_
+#define CAST5_SBOX_H_
+
+#include <avr/pgmspace.h>
+#include <stdint.h>
+
+#ifndef BIG_ENDIAN
+
+uint32_t s1[] PROGMEM = {
+0x30fb40d4UL, 0x9fa0ff0bUL, 0x6beccd2fUL, 0x3f258c7aUL, 0x1e213f2fUL, 0x9c004dd3UL, 0x6003e540UL, 0xcf9fc949UL,
+0xbfd4af27UL, 0x88bbbdb5UL, 0xe2034090UL, 0x98d09675UL, 0x6e63a0e0UL, 0x15c361d2UL, 0xc2e7661dUL, 0x22d4ff8eUL,
+0x28683b6fUL, 0xc07fd059UL, 0xff2379c8UL, 0x775f50e2UL, 0x43c340d3UL, 0xdf2f8656UL, 0x887ca41aUL, 0xa2d2bd2dUL,
+0xa1c9e0d6UL, 0x346c4819UL, 0x61b76d87UL, 0x22540f2fUL, 0x2abe32e1UL, 0xaa54166bUL, 0x22568e3aUL, 0xa2d341d0UL,
+0x66db40c8UL, 0xa784392fUL, 0x004dff2fUL, 0x2db9d2deUL, 0x97943facUL, 0x4a97c1d8UL, 0x527644b7UL, 0xb5f437a7UL,
+0xb82cbaefUL, 0xd751d159UL, 0x6ff7f0edUL, 0x5a097a1fUL, 0x827b68d0UL, 0x90ecf52eUL, 0x22b0c054UL, 0xbc8e5935UL,
+0x4b6d2f7fUL, 0x50bb64a2UL, 0xd2664910UL, 0xbee5812dUL, 0xb7332290UL, 0xe93b159fUL, 0xb48ee411UL, 0x4bff345dUL,
+0xfd45c240UL, 0xad31973fUL, 0xc4f6d02eUL, 0x55fc8165UL, 0xd5b1caadUL, 0xa1ac2daeUL, 0xa2d4b76dUL, 0xc19b0c50UL,
+0x882240f2UL, 0x0c6e4f38UL, 0xa4e4bfd7UL, 0x4f5ba272UL, 0x564c1d2fUL, 0xc59c5319UL, 0xb949e354UL, 0xb04669feUL,
+0xb1b6ab8aUL, 0xc71358ddUL, 0x6385c545UL, 0x110f935dUL, 0x57538ad5UL, 0x6a390493UL, 0xe63d37e0UL, 0x2a54f6b3UL,
+0x3a787d5fUL, 0x6276a0b5UL, 0x19a6fcdfUL, 0x7a42206aUL, 0x29f9d4d5UL, 0xf61b1891UL, 0xbb72275eUL, 0xaa508167UL,
+0x38901091UL, 0xc6b505ebUL, 0x84c7cb8cUL, 0x2ad75a0fUL, 0x874a1427UL, 0xa2d1936bUL, 0x2ad286afUL, 0xaa56d291UL,
+0xd7894360UL, 0x425c750dUL, 0x93b39e26UL, 0x187184c9UL, 0x6c00b32dUL, 0x73e2bb14UL, 0xa0bebc3cUL, 0x54623779UL,
+0x64459eabUL, 0x3f328b82UL, 0x7718cf82UL, 0x59a2cea6UL, 0x04ee002eUL, 0x89fe78e6UL, 0x3fab0950UL, 0x325ff6c2UL,
+0x81383f05UL, 0x6963c5c8UL, 0x76cb5ad6UL, 0xd49974c9UL, 0xca180dcfUL, 0x380782d5UL, 0xc7fa5cf6UL, 0x8ac31511UL,
+0x35e79e13UL, 0x47da91d0UL, 0xf40f9086UL, 0xa7e2419eUL, 0x31366241UL, 0x051ef495UL, 0xaa573b04UL, 0x4a805d8dUL,
+0x548300d0UL, 0x00322a3cUL, 0xbf64cddfUL, 0xba57a68eUL, 0x75c6372bUL, 0x50afd341UL, 0xa7c13275UL, 0x915a0bf5UL,
+0x6b54bfabUL, 0x2b0b1426UL, 0xab4cc9d7UL, 0x449ccd82UL, 0xf7fbf265UL, 0xab85c5f3UL, 0x1b55db94UL, 0xaad4e324UL,
+0xcfa4bd3fUL, 0x2deaa3e2UL, 0x9e204d02UL, 0xc8bd25acUL, 0xeadf55b3UL, 0xd5bd9e98UL, 0xe31231b2UL, 0x2ad5ad6cUL,
+0x954329deUL, 0xadbe4528UL, 0xd8710f69UL, 0xaa51c90fUL, 0xaa786bf6UL, 0x22513f1eUL, 0xaa51a79bUL, 0x2ad344ccUL,
+0x7b5a41f0UL, 0xd37cfbadUL, 0x1b069505UL, 0x41ece491UL, 0xb4c332e6UL, 0x032268d4UL, 0xc9600accUL, 0xce387e6dUL,
+0xbf6bb16cUL, 0x6a70fb78UL, 0x0d03d9c9UL, 0xd4df39deUL, 0xe01063daUL, 0x4736f464UL, 0x5ad328d8UL, 0xb347cc96UL,
+0x75bb0fc3UL, 0x98511bfbUL, 0x4ffbcc35UL, 0xb58bcf6aUL, 0xe11f0abcUL, 0xbfc5fe4aUL, 0xa70aec10UL, 0xac39570aUL,
+0x3f04442fUL, 0x6188b153UL, 0xe0397a2eUL, 0x5727cb79UL, 0x9ceb418fUL, 0x1cacd68dUL, 0x2ad37c96UL, 0x0175cb9dUL,
+0xc69dff09UL, 0xc75b65f0UL, 0xd9db40d8UL, 0xec0e7779UL, 0x4744ead4UL, 0xb11c3274UL, 0xdd24cb9eUL, 0x7e1c54bdUL,
+0xf01144f9UL, 0xd2240eb1UL, 0x9675b3fdUL, 0xa3ac3755UL, 0xd47c27afUL, 0x51c85f4dUL, 0x56907596UL, 0xa5bb15e6UL,
+0x580304f0UL, 0xca042cf1UL, 0x011a37eaUL, 0x8dbfaadbUL, 0x35ba3e4aUL, 0x3526ffa0UL, 0xc37b4d09UL, 0xbc306ed9UL,
+0x98a52666UL, 0x5648f725UL, 0xff5e569dUL, 0x0ced63d0UL, 0x7c63b2cfUL, 0x700b45e1UL, 0xd5ea50f1UL, 0x85a92872UL,
+0xaf1fbda7UL, 0xd4234870UL, 0xa7870bf3UL, 0x2d3b4d79UL, 0x42e04198UL, 0x0cd0ede7UL, 0x26470db8UL, 0xf881814cUL,
+0x474d6ad7UL, 0x7c0c5e5cUL, 0xd1231959UL, 0x381b7298UL, 0xf5d2f4dbUL, 0xab838653UL, 0x6e2f1e23UL, 0x83719c9eUL,
+0xbd91e046UL, 0x9a56456eUL, 0xdc39200cUL, 0x20c8c571UL, 0x962bda1cUL, 0xe1e696ffUL, 0xb141ab08UL, 0x7cca89b9UL,
+0x1a69e783UL, 0x02cc4843UL, 0xa2f7c579UL, 0x429ef47dUL, 0x427b169cUL, 0x5ac9f049UL, 0xdd8f0f00UL, 0x5c8165bfUL};
+
+uint32_t s2[] PROGMEM = {
+0x1f201094UL, 0xef0ba75bUL, 0x69e3cf7eUL, 0x393f4380UL, 0xfe61cf7aUL, 0xeec5207aUL, 0x55889c94UL, 0x72fc0651UL,
+0xada7ef79UL, 0x4e1d7235UL, 0xd55a63ceUL, 0xde0436baUL, 0x99c430efUL, 0x5f0c0794UL, 0x18dcdb7dUL, 0xa1d6eff3UL,
+0xa0b52f7bUL, 0x59e83605UL, 0xee15b094UL, 0xe9ffd909UL, 0xdc440086UL, 0xef944459UL, 0xba83ccb3UL, 0xe0c3cdfbUL,
+0xd1da4181UL, 0x3b092ab1UL, 0xf997f1c1UL, 0xa5e6cf7bUL, 0x01420ddbUL, 0xe4e7ef5bUL, 0x25a1ff41UL, 0xe180f806UL,
+0x1fc41080UL, 0x179bee7aUL, 0xd37ac6a9UL, 0xfe5830a4UL, 0x98de8b7fUL, 0x77e83f4eUL, 0x79929269UL, 0x24fa9f7bUL,
+0xe113c85bUL, 0xacc40083UL, 0xd7503525UL, 0xf7ea615fUL, 0x62143154UL, 0x0d554b63UL, 0x5d681121UL, 0xc866c359UL,
+0x3d63cf73UL, 0xcee234c0UL, 0xd4d87e87UL, 0x5c672b21UL, 0x071f6181UL, 0x39f7627fUL, 0x361e3084UL, 0xe4eb573bUL,
+0x602f64a4UL, 0xd63acd9cUL, 0x1bbc4635UL, 0x9e81032dUL, 0x2701f50cUL, 0x99847ab4UL, 0xa0e3df79UL, 0xba6cf38cUL,
+0x10843094UL, 0x2537a95eUL, 0xf46f6ffeUL, 0xa1ff3b1fUL, 0x208cfb6aUL, 0x8f458c74UL, 0xd9e0a227UL, 0x4ec73a34UL,
+0xfc884f69UL, 0x3e4de8dfUL, 0xef0e0088UL, 0x3559648dUL, 0x8a45388cUL, 0x1d804366UL, 0x721d9bfdUL, 0xa58684bbUL,
+0xe8256333UL, 0x844e8212UL, 0x128d8098UL, 0xfed33fb4UL, 0xce280ae1UL, 0x27e19ba5UL, 0xd5a6c252UL, 0xe49754bdUL,
+0xc5d655ddUL, 0xeb667064UL, 0x77840b4dUL, 0xa1b6a801UL, 0x84db26a9UL, 0xe0b56714UL, 0x21f043b7UL, 0xe5d05860UL,
+0x54f03084UL, 0x066ff472UL, 0xa31aa153UL, 0xdadc4755UL, 0xb5625dbfUL, 0x68561be6UL, 0x83ca6b94UL, 0x2d6ed23bUL,
+0xeccf01dbUL, 0xa6d3d0baUL, 0xb6803d5cUL, 0xaf77a709UL, 0x33b4a34cUL, 0x397bc8d6UL, 0x5ee22b95UL, 0x5f0e5304UL,
+0x81ed6f61UL, 0x20e74364UL, 0xb45e1378UL, 0xde18639bUL, 0x881ca122UL, 0xb96726d1UL, 0x8049a7e8UL, 0x22b7da7bUL,
+0x5e552d25UL, 0x5272d237UL, 0x79d2951cUL, 0xc60d894cUL, 0x488cb402UL, 0x1ba4fe5bUL, 0xa4b09f6bUL, 0x1ca815cfUL,
+0xa20c3005UL, 0x8871df63UL, 0xb9de2fcbUL, 0x0cc6c9e9UL, 0x0beeff53UL, 0xe3214517UL, 0xb4542835UL, 0x9f63293cUL,
+0xee41e729UL, 0x6e1d2d7cUL, 0x50045286UL, 0x1e6685f3UL, 0xf33401c6UL, 0x30a22c95UL, 0x31a70850UL, 0x60930f13UL,
+0x73f98417UL, 0xa1269859UL, 0xec645c44UL, 0x52c877a9UL, 0xcdff33a6UL, 0xa02b1741UL, 0x7cbad9a2UL, 0x2180036fUL,
+0x50d99c08UL, 0xcb3f4861UL, 0xc26bd765UL, 0x64a3f6abUL, 0x80342676UL, 0x25a75e7bUL, 0xe4e6d1fcUL, 0x20c710e6UL,
+0xcdf0b680UL, 0x17844d3bUL, 0x31eef84dUL, 0x7e0824e4UL, 0x2ccb49ebUL, 0x846a3baeUL, 0x8ff77888UL, 0xee5d60f6UL,
+0x7af75673UL, 0x2fdd5cdbUL, 0xa11631c1UL, 0x30f66f43UL, 0xb3faec54UL, 0x157fd7faUL, 0xef8579ccUL, 0xd152de58UL,
+0xdb2ffd5eUL, 0x8f32ce19UL, 0x306af97aUL, 0x02f03ef8UL, 0x99319ad5UL, 0xc242fa0fUL, 0xa7e3ebb0UL, 0xc68e4906UL,
+0xb8da230cUL, 0x80823028UL, 0xdcdef3c8UL, 0xd35fb171UL, 0x088a1bc8UL, 0xbec0c560UL, 0x61a3c9e8UL, 0xbca8f54dUL,
+0xc72feffaUL, 0x22822e99UL, 0x82c570b4UL, 0xd8d94e89UL, 0x8b1c34bcUL, 0x301e16e6UL, 0x273be979UL, 0xb0ffeaa6UL,
+0x61d9b8c6UL, 0x00b24869UL, 0xb7ffce3fUL, 0x08dc283bUL, 0x43daf65aUL, 0xf7e19798UL, 0x7619b72fUL, 0x8f1c9ba4UL,
+0xdc8637a0UL, 0x16a7d3b1UL, 0x9fc393b7UL, 0xa7136eebUL, 0xc6bcc63eUL, 0x1a513742UL, 0xef6828bcUL, 0x520365d6UL,
+0x2d6a77abUL, 0x3527ed4bUL, 0x821fd216UL, 0x095c6e2eUL, 0xdb92f2fbUL, 0x5eea29cbUL, 0x145892f5UL, 0x91584f7fUL,
+0x5483697bUL, 0x2667a8ccUL, 0x85196048UL, 0x8c4baceaUL, 0x833860d4UL, 0x0d23e0f9UL, 0x6c387e8aUL, 0x0ae6d249UL,
+0xb284600cUL, 0xd835731dUL, 0xdcb1c647UL, 0xac4c56eaUL, 0x3ebd81b3UL, 0x230eabb0UL, 0x6438bc87UL, 0xf0b5b1faUL,
+0x8f5ea2b3UL, 0xfc184642UL, 0x0a036b7aUL, 0x4fb089bdUL, 0x649da589UL, 0xa345415eUL, 0x5c038323UL, 0x3e5d3bb9UL,
+0x43d79572UL, 0x7e6dd07cUL, 0x06dfdf1eUL, 0x6c6cc4efUL, 0x7160a539UL, 0x73bfbe70UL, 0x83877605UL, 0x4523ecf1UL};
+
+uint32_t s3[] PROGMEM = {
+0x8defc240UL, 0x25fa5d9fUL, 0xeb903dbfUL, 0xe810c907UL, 0x47607fffUL, 0x369fe44bUL, 0x8c1fc644UL, 0xaececa90UL,
+0xbeb1f9bfUL, 0xeefbcaeaUL, 0xe8cf1950UL, 0x51df07aeUL, 0x920e8806UL, 0xf0ad0548UL, 0xe13c8d83UL, 0x927010d5UL,
+0x11107d9fUL, 0x07647db9UL, 0xb2e3e4d4UL, 0x3d4f285eUL, 0xb9afa820UL, 0xfade82e0UL, 0xa067268bUL, 0x8272792eUL,
+0x553fb2c0UL, 0x489ae22bUL, 0xd4ef9794UL, 0x125e3fbcUL, 0x21fffceeUL, 0x825b1bfdUL, 0x9255c5edUL, 0x1257a240UL,
+0x4e1a8302UL, 0xbae07fffUL, 0x528246e7UL, 0x8e57140eUL, 0x3373f7bfUL, 0x8c9f8188UL, 0xa6fc4ee8UL, 0xc982b5a5UL,
+0xa8c01db7UL, 0x579fc264UL, 0x67094f31UL, 0xf2bd3f5fUL, 0x40fff7c1UL, 0x1fb78dfcUL, 0x8e6bd2c1UL, 0x437be59bUL,
+0x99b03dbfUL, 0xb5dbc64bUL, 0x638dc0e6UL, 0x55819d99UL, 0xa197c81cUL, 0x4a012d6eUL, 0xc5884a28UL, 0xccc36f71UL,
+0xb843c213UL, 0x6c0743f1UL, 0x8309893cUL, 0x0feddd5fUL, 0x2f7fe850UL, 0xd7c07f7eUL, 0x02507fbfUL, 0x5afb9a04UL,
+0xa747d2d0UL, 0x1651192eUL, 0xaf70bf3eUL, 0x58c31380UL, 0x5f98302eUL, 0x727cc3c4UL, 0x0a0fb402UL, 0x0f7fef82UL,
+0x8c96fdadUL, 0x5d2c2aaeUL, 0x8ee99a49UL, 0x50da88b8UL, 0x8427f4a0UL, 0x1eac5790UL, 0x796fb449UL, 0x8252dc15UL,
+0xefbd7d9bUL, 0xa672597dUL, 0xada840d8UL, 0x45f54504UL, 0xfa5d7403UL, 0xe83ec305UL, 0x4f91751aUL, 0x925669c2UL,
+0x23efe941UL, 0xa903f12eUL, 0x60270df2UL, 0x0276e4b6UL, 0x94fd6574UL, 0x927985b2UL, 0x8276dbcbUL, 0x02778176UL,
+0xf8af918dUL, 0x4e48f79eUL, 0x8f616ddfUL, 0xe29d840eUL, 0x842f7d83UL, 0x340ce5c8UL, 0x96bbb682UL, 0x93b4b148UL,
+0xef303cabUL, 0x984faf28UL, 0x779faf9bUL, 0x92dc560dUL, 0x224d1e20UL, 0x8437aa88UL, 0x7d29dc96UL, 0x2756d3dcUL,
+0x8b907ceeUL, 0xb51fd240UL, 0xe7c07ce3UL, 0xe566b4a1UL, 0xc3e9615eUL, 0x3cf8209dUL, 0x6094d1e3UL, 0xcd9ca341UL,
+0x5c76460eUL, 0x00ea983bUL, 0xd4d67881UL, 0xfd47572cUL, 0xf76cedd9UL, 0xbda8229cUL, 0x127dadaaUL, 0x438a074eUL,
+0x1f97c090UL, 0x081bdb8aUL, 0x93a07ebeUL, 0xb938ca15UL, 0x97b03cffUL, 0x3dc2c0f8UL, 0x8d1ab2ecUL, 0x64380e51UL,
+0x68cc7bfbUL, 0xd90f2788UL, 0x12490181UL, 0x5de5ffd4UL, 0xdd7ef86aUL, 0x76a2e214UL, 0xb9a40368UL, 0x925d958fUL,
+0x4b39fffaUL, 0xba39aee9UL, 0xa4ffd30bUL, 0xfaf7933bUL, 0x6d498623UL, 0x193cbcfaUL, 0x27627545UL, 0x825cf47aUL,
+0x61bd8ba0UL, 0xd11e42d1UL, 0xcead04f4UL, 0x127ea392UL, 0x10428db7UL, 0x8272a972UL, 0x9270c4a8UL, 0x127de50bUL,
+0x285ba1c8UL, 0x3c62f44fUL, 0x35c0eaa5UL, 0xe805d231UL, 0x428929fbUL, 0xb4fcdf82UL, 0x4fb66a53UL, 0x0e7dc15bUL,
+0x1f081fabUL, 0x108618aeUL, 0xfcfd086dUL, 0xf9ff2889UL, 0x694bcc11UL, 0x236a5caeUL, 0x12deca4dUL, 0x2c3f8cc5UL,
+0xd2d02dfeUL, 0xf8ef5896UL, 0xe4cf52daUL, 0x95155b67UL, 0x494a488cUL, 0xb9b6a80cUL, 0x5c8f82bcUL, 0x89d36b45UL,
+0x3a609437UL, 0xec00c9a9UL, 0x44715253UL, 0x0a874b49UL, 0xd773bc40UL, 0x7c34671cUL, 0x02717ef6UL, 0x4feb5536UL,
+0xa2d02fffUL, 0xd2bf60c4UL, 0xd43f03c0UL, 0x50b4ef6dUL, 0x07478cd1UL, 0x006e1888UL, 0xa2e53f55UL, 0xb9e6d4bcUL,
+0xa2048016UL, 0x97573833UL, 0xd7207d67UL, 0xde0f8f3dUL, 0x72f87b33UL, 0xabcc4f33UL, 0x7688c55dUL, 0x7b00a6b0UL,
+0x947b0001UL, 0x570075d2UL, 0xf9bb88f8UL, 0x8942019eUL, 0x4264a5ffUL, 0x856302e0UL, 0x72dbd92bUL, 0xee971b69UL,
+0x6ea22fdeUL, 0x5f08ae2bUL, 0xaf7a616dUL, 0xe5c98767UL, 0xcf1febd2UL, 0x61efc8c2UL, 0xf1ac2571UL, 0xcc8239c2UL,
+0x67214cb8UL, 0xb1e583d1UL, 0xb7dc3e62UL, 0x7f10bdceUL, 0xf90a5c38UL, 0x0ff0443dUL, 0x606e6dc6UL, 0x60543a49UL,
+0x5727c148UL, 0x2be98a1dUL, 0x8ab41738UL, 0x20e1be24UL, 0xaf96da0fUL, 0x68458425UL, 0x99833be5UL, 0x600d457dUL,
+0x282f9350UL, 0x8334b362UL, 0xd91d1120UL, 0x2b6d8da0UL, 0x642b1e31UL, 0x9c305a00UL, 0x52bce688UL, 0x1b03588aUL,
+0xf7baefd5UL, 0x4142ed9cUL, 0xa4315c11UL, 0x83323ec5UL, 0xdfef4636UL, 0xa133c501UL, 0xe9d3531cUL, 0xee353783UL};
+
+uint32_t s4[] PROGMEM = {
+0x9db30420UL, 0x1fb6e9deUL, 0xa7be7befUL, 0xd273a298UL, 0x4a4f7bdbUL, 0x64ad8c57UL, 0x85510443UL, 0xfa020ed1UL,
+0x7e287affUL, 0xe60fb663UL, 0x095f35a1UL, 0x79ebf120UL, 0xfd059d43UL, 0x6497b7b1UL, 0xf3641f63UL, 0x241e4adfUL,
+0x28147f5fUL, 0x4fa2b8cdUL, 0xc9430040UL, 0x0cc32220UL, 0xfdd30b30UL, 0xc0a5374fUL, 0x1d2d00d9UL, 0x24147b15UL,
+0xee4d111aUL, 0x0fca5167UL, 0x71ff904cUL, 0x2d195ffeUL, 0x1a05645fUL, 0x0c13fefeUL, 0x081b08caUL, 0x05170121UL,
+0x80530100UL, 0xe83e5efeUL, 0xac9af4f8UL, 0x7fe72701UL, 0xd2b8ee5fUL, 0x06df4261UL, 0xbb9e9b8aUL, 0x7293ea25UL,
+0xce84ffdfUL, 0xf5718801UL, 0x3dd64b04UL, 0xa26f263bUL, 0x7ed48400UL, 0x547eebe6UL, 0x446d4ca0UL, 0x6cf3d6f5UL,
+0x2649abdfUL, 0xaea0c7f5UL, 0x36338cc1UL, 0x503f7e93UL, 0xd3772061UL, 0x11b638e1UL, 0x72500e03UL, 0xf80eb2bbUL,
+0xabe0502eUL, 0xec8d77deUL, 0x57971e81UL, 0xe14f6746UL, 0xc9335400UL, 0x6920318fUL, 0x081dbb99UL, 0xffc304a5UL,
+0x4d351805UL, 0x7f3d5ce3UL, 0xa6c866c6UL, 0x5d5bcca9UL, 0xdaec6feaUL, 0x9f926f91UL, 0x9f46222fUL, 0x3991467dUL,
+0xa5bf6d8eUL, 0x1143c44fUL, 0x43958302UL, 0xd0214eebUL, 0x022083b8UL, 0x3fb6180cUL, 0x18f8931eUL, 0x281658e6UL,
+0x26486e3eUL, 0x8bd78a70UL, 0x7477e4c1UL, 0xb506e07cUL, 0xf32d0a25UL, 0x79098b02UL, 0xe4eabb81UL, 0x28123b23UL,
+0x69dead38UL, 0x1574ca16UL, 0xdf871b62UL, 0x211c40b7UL, 0xa51a9ef9UL, 0x0014377bUL, 0x041e8ac8UL, 0x09114003UL,
+0xbd59e4d2UL, 0xe3d156d5UL, 0x4fe876d5UL, 0x2f91a340UL, 0x557be8deUL, 0x00eae4a7UL, 0x0ce5c2ecUL, 0x4db4bba6UL,
+0xe756bdffUL, 0xdd3369acUL, 0xec17b035UL, 0x06572327UL, 0x99afc8b0UL, 0x56c8c391UL, 0x6b65811cUL, 0x5e146119UL,
+0x6e85cb75UL, 0xbe07c002UL, 0xc2325577UL, 0x893ff4ecUL, 0x5bbfc92dUL, 0xd0ec3b25UL, 0xb7801ab7UL, 0x8d6d3b24UL,
+0x20c763efUL, 0xc366a5fcUL, 0x9c382880UL, 0x0ace3205UL, 0xaac9548aUL, 0xeca1d7c7UL, 0x041afa32UL, 0x1d16625aUL,
+0x6701902cUL, 0x9b757a54UL, 0x31d477f7UL, 0x9126b031UL, 0x36cc6fdbUL, 0xc70b8b46UL, 0xd9e66a48UL, 0x56e55a79UL,
+0x026a4cebUL, 0x52437effUL, 0x2f8f76b4UL, 0x0df980a5UL, 0x8674cde3UL, 0xedda04ebUL, 0x17a9be04UL, 0x2c18f4dfUL,
+0xb7747f9dUL, 0xab2af7b4UL, 0xefc34d20UL, 0x2e096b7cUL, 0x1741a254UL, 0xe5b6a035UL, 0x213d42f6UL, 0x2c1c7c26UL,
+0x61c2f50fUL, 0x6552daf9UL, 0xd2c231f8UL, 0x25130f69UL, 0xd8167fa2UL, 0x0418f2c8UL, 0x001a96a6UL, 0x0d1526abUL,
+0x63315c21UL, 0x5e0a72ecUL, 0x49bafefdUL, 0x187908d9UL, 0x8d0dbd86UL, 0x311170a7UL, 0x3e9b640cUL, 0xcc3e10d7UL,
+0xd5cad3b6UL, 0x0caec388UL, 0xf73001e1UL, 0x6c728affUL, 0x71eae2a1UL, 0x1f9af36eUL, 0xcfcbd12fUL, 0xc1de8417UL,
+0xac07be6bUL, 0xcb44a1d8UL, 0x8b9b0f56UL, 0x013988c3UL, 0xb1c52fcaUL, 0xb4be31cdUL, 0xd8782806UL, 0x12a3a4e2UL,
+0x6f7de532UL, 0x58fd7eb6UL, 0xd01ee900UL, 0x24adffc2UL, 0xf4990fc5UL, 0x9711aac5UL, 0x001d7b95UL, 0x82e5e7d2UL,
+0x109873f6UL, 0x00613096UL, 0xc32d9521UL, 0xada121ffUL, 0x29908415UL, 0x7fbb977fUL, 0xaf9eb3dbUL, 0x29c9ed2aUL,
+0x5ce2a465UL, 0xa730f32cUL, 0xd0aa3fe8UL, 0x8a5cc091UL, 0xd49e2ce7UL, 0x0ce454a9UL, 0xd60acd86UL, 0x015f1919UL,
+0x77079103UL, 0xdea03af6UL, 0x78a8565eUL, 0xdee356dfUL, 0x21f05cbeUL, 0x8b75e387UL, 0xb3c50651UL, 0xb8a5c3efUL,
+0xd8eeb6d2UL, 0xe523be77UL, 0xc2154529UL, 0x2f69efdfUL, 0xafe67afbUL, 0xf470c4b2UL, 0xf3e0eb5bUL, 0xd6cc9876UL,
+0x39e4460cUL, 0x1fda8538UL, 0x1987832fUL, 0xca007367UL, 0xa99144f8UL, 0x296b299eUL, 0x492fc295UL, 0x9266beabUL,
+0xb5676e69UL, 0x9bd3dddaUL, 0xdf7e052fUL, 0xdb25701cUL, 0x1b5e51eeUL, 0xf65324e6UL, 0x6afce36cUL, 0x0316cc04UL,
+0x8644213eUL, 0xb7dc59d0UL, 0x7965291fUL, 0xccd6fd43UL, 0x41823979UL, 0x932bcdf6UL, 0xb657c34dUL, 0x4edfd282UL,
+0x7ae5290cUL, 0x3cb9536bUL, 0x851e20feUL, 0x9833557eUL, 0x13ecf0b0UL, 0xd3ffb372UL, 0x3f85c5c1UL, 0x0aef7ed2UL};
+
+#else
+
+uint32_t s1[] PROGMEM = {
+0xd440fb30UL, 0x0bffa09fUL, 0x2fcdec6bUL, 0x7a8c253fUL, 0x2f3f211eUL, 0xd34d009cUL, 0x40e50360UL, 0x49c99fcfUL,
+0x27afd4bfUL, 0xb5bdbb88UL, 0x904003e2UL, 0x7596d098UL, 0xe0a0636eUL, 0xd261c315UL, 0x1d66e7c2UL, 0x8effd422UL,
+0x6f3b6828UL, 0x59d07fc0UL, 0xc87923ffUL, 0xe2505f77UL, 0xd340c343UL, 0x56862fdfUL, 0x1aa47c88UL, 0x2dbdd2a2UL,
+0xd6e0c9a1UL, 0x19486c34UL, 0x876db761UL, 0x2f0f5422UL, 0xe132be2aUL, 0x6b1654aaUL, 0x3a8e5622UL, 0xd041d3a2UL,
+0xc840db66UL, 0x2f3984a7UL, 0x2fff4d00UL, 0xded2b92dUL, 0xac3f9497UL, 0xd8c1974aUL, 0xb7447652UL, 0xa737f4b5UL,
+0xefba2cb8UL, 0x59d151d7UL, 0xedf0f76fUL, 0x1f7a095aUL, 0xd0687b82UL, 0x2ef5ec90UL, 0x54c0b022UL, 0x35598ebcUL,
+0x7f2f6d4bUL, 0xa264bb50UL, 0x104966d2UL, 0x2d81e5beUL, 0x902233b7UL, 0x9f153be9UL, 0x11e48eb4UL, 0x5d34ff4bUL,
+0x40c245fdUL, 0x3f9731adUL, 0x2ed0f6c4UL, 0x6581fc55UL, 0xadcab1d5UL, 0xae2daca1UL, 0x6db7d4a2UL, 0x500c9bc1UL,
+0xf2402288UL, 0x384f6e0cUL, 0xd7bfe4a4UL, 0x72a25b4fUL, 0x2f1d4c56UL, 0x19539cc5UL, 0x54e349b9UL, 0xfe6946b0UL,
+0x8aabb6b1UL, 0xdd5813c7UL, 0x45c58563UL, 0x5d930f11UL, 0xd58a5357UL, 0x9304396aUL, 0xe0373de6UL, 0xb3f6542aUL,
+0x5f7d783aUL, 0xb5a07662UL, 0xdffca619UL, 0x6a20427aUL, 0xd5d4f929UL, 0x91181bf6UL, 0x5e2772bbUL, 0x678150aaUL,
+0x91109038UL, 0xeb05b5c6UL, 0x8ccbc784UL, 0x0f5ad72aUL, 0x27144a87UL, 0x6b93d1a2UL, 0xaf86d22aUL, 0x91d256aaUL,
+0x604389d7UL, 0x0d755c42UL, 0x269eb393UL, 0xc9847118UL, 0x2db3006cUL, 0x14bbe273UL, 0x3cbcbea0UL, 0x79376254UL,
+0xab9e4564UL, 0x828b323fUL, 0x82cf1877UL, 0xa6cea259UL, 0x2e00ee04UL, 0xe678fe89UL, 0x5009ab3fUL, 0xc2f65f32UL,
+0x053f3881UL, 0xc8c56369UL, 0xd65acb76UL, 0xc97499d4UL, 0xcf0d18caUL, 0xd5820738UL, 0xf65cfac7UL, 0x1115c38aUL,
+0x139ee735UL, 0xd091da47UL, 0x86900ff4UL, 0x9e41e2a7UL, 0x41623631UL, 0x95f41e05UL, 0x043b57aaUL, 0x8d5d804aUL,
+0xd0008354UL, 0x3c2a3200UL, 0xdfcd64bfUL, 0x8ea657baUL, 0x2b37c675UL, 0x41d3af50UL, 0x7532c1a7UL, 0xf50b5a91UL,
+0xabbf546bUL, 0x26140b2bUL, 0xd7c94cabUL, 0x82cd9c44UL, 0x65f2fbf7UL, 0xf3c585abUL, 0x94db551bUL, 0x24e3d4aaUL,
+0x3fbda4cfUL, 0xe2a3ea2dUL, 0x024d209eUL, 0xac25bdc8UL, 0xb355dfeaUL, 0x989ebdd5UL, 0xb23112e3UL, 0x6cadd52aUL,
+0xde294395UL, 0x2845beadUL, 0x690f71d8UL, 0x0fc951aaUL, 0xf66b78aaUL, 0x1e3f5122UL, 0x9ba751aaUL, 0xcc44d32aUL,
+0xf0415a7bUL, 0xadfb7cd3UL, 0x0595061bUL, 0x91e4ec41UL, 0xe632c3b4UL, 0xd4682203UL, 0xcc0a60c9UL, 0x6d7e38ceUL,
+0x6cb16bbfUL, 0x78fb706aUL, 0xc9d9030dUL, 0xde39dfd4UL, 0xda6310e0UL, 0x64f43647UL, 0xd828d35aUL, 0x96cc47b3UL,
+0xc30fbb75UL, 0xfb1b5198UL, 0x35ccfb4fUL, 0x6acf8bb5UL, 0xbc0a1fe1UL, 0x4afec5bfUL, 0x10ec0aa7UL, 0x0a5739acUL,
+0x2f44043fUL, 0x53b18861UL, 0x2e7a39e0UL, 0x79cb2757UL, 0x8f41eb9cUL, 0x8dd6ac1cUL, 0x967cd32aUL, 0x9dcb7501UL,
+0x09ff9dc6UL, 0xf0655bc7UL, 0xd840dbd9UL, 0x79770eecUL, 0xd4ea4447UL, 0x74321cb1UL, 0x9ecb24ddUL, 0xbd541c7eUL,
+0xf94411f0UL, 0xb10e24d2UL, 0xfdb37596UL, 0x5537aca3UL, 0xaf277cd4UL, 0x4d5fc851UL, 0x96759056UL, 0xe615bba5UL,
+0xf0040358UL, 0xf12c04caUL, 0xea371a01UL, 0xdbaabf8dUL, 0x4a3eba35UL, 0xa0ff2635UL, 0x094d7bc3UL, 0xd96e30bcUL,
+0x6626a598UL, 0x25f74856UL, 0x9d565effUL, 0xd063ed0cUL, 0xcfb2637cUL, 0xe1450b70UL, 0xf150ead5UL, 0x7228a985UL,
+0xa7bd1fafUL, 0x704823d4UL, 0xf30b87a7UL, 0x794d3b2dUL, 0x9841e042UL, 0xe7edd00cUL, 0xb80d4726UL, 0x4c8181f8UL,
+0xd76a4d47UL, 0x5c5e0c7cUL, 0x591923d1UL, 0x98721b38UL, 0xdbf4d2f5UL, 0x538683abUL, 0x231e2f6eUL, 0x9e9c7183UL,
+0x46e091bdUL, 0x6e45569aUL, 0x0c2039dcUL, 0x71c5c820UL, 0x1cda2b96UL, 0xff96e6e1UL, 0x08ab41b1UL, 0xb989ca7cUL,
+0x83e7691aUL, 0x4348cc02UL, 0x79c5f7a2UL, 0x7df49e42UL, 0x9c167b42UL, 0x49f0c95aUL, 0x000f8fddUL, 0xbf65815cUL};
+
+uint32_t s2[] PROGMEM = {
+0x9410201fUL, 0x5ba70befUL, 0x7ecfe369UL, 0x80433f39UL, 0x7acf61feUL, 0x7a20c5eeUL, 0x949c8855UL, 0x5106fc72UL,
+0x79efa7adUL, 0x35721d4eUL, 0xce635ad5UL, 0xba3604deUL, 0xef30c499UL, 0x94070c5fUL, 0x7ddbdc18UL, 0xf3efd6a1UL,
+0x7b2fb5a0UL, 0x0536e859UL, 0x94b015eeUL, 0x09d9ffe9UL, 0x860044dcUL, 0x594494efUL, 0xb3cc83baUL, 0xfbcdc3e0UL,
+0x8141dad1UL, 0xb12a093bUL, 0xc1f197f9UL, 0x7bcfe6a5UL, 0xdb0d4201UL, 0x5befe7e4UL, 0x41ffa125UL, 0x06f880e1UL,
+0x8010c41fUL, 0x7aee9b17UL, 0xa9c67ad3UL, 0xa43058feUL, 0x7f8bde98UL, 0x4e3fe877UL, 0x69929279UL, 0x7b9ffa24UL,
+0x5bc813e1UL, 0x8300c4acUL, 0x253550d7UL, 0x5f61eaf7UL, 0x54311462UL, 0x634b550dUL, 0x2111685dUL, 0x59c366c8UL,
+0x73cf633dUL, 0xc034e2ceUL, 0x877ed8d4UL, 0x212b675cUL, 0x81611f07UL, 0x7f62f739UL, 0x84301e36UL, 0x3b57ebe4UL,
+0xa4642f60UL, 0x9ccd3ad6UL, 0x3546bc1bUL, 0x2d03819eUL, 0x0cf50127UL, 0xb47a8499UL, 0x79dfe3a0UL, 0x8cf36cbaUL,
+0x94308410UL, 0x5ea93725UL, 0xfe6f6ff4UL, 0x1f3bffa1UL, 0x6afb8c20UL, 0x748c458fUL, 0x27a2e0d9UL, 0x343ac74eUL,
+0x694f88fcUL, 0xdfe84d3eUL, 0x88000eefUL, 0x8d645935UL, 0x8c38458aUL, 0x6643801dUL, 0xfd9b1d72UL, 0xbb8486a5UL,
+0x336325e8UL, 0x12824e84UL, 0x98808d12UL, 0xb43fd3feUL, 0xe10a28ceUL, 0xa59be127UL, 0x52c2a6d5UL, 0xbd5497e4UL,
+0xdd55d6c5UL, 0x647066ebUL, 0x4d0b8477UL, 0x01a8b6a1UL, 0xa926db84UL, 0x1467b5e0UL, 0xb743f021UL, 0x6058d0e5UL,
+0x8430f054UL, 0x72f46f06UL, 0x53a11aa3UL, 0x5547dcdaUL, 0xbf5d62b5UL, 0xe61b5668UL, 0x946bca83UL, 0x3bd26e2dUL,
+0xdb01cfecUL, 0xbad0d3a6UL, 0x5c3d80b6UL, 0x09a777afUL, 0x4ca3b433UL, 0xd6c87b39UL, 0x952be25eUL, 0x04530e5fUL,
+0x616fed81UL, 0x6443e720UL, 0x78135eb4UL, 0x9b6318deUL, 0x22a11c88UL, 0xd12667b9UL, 0xe8a74980UL, 0x7bdab722UL,
+0x252d555eUL, 0x37d27252UL, 0x1c95d279UL, 0x4c890dc6UL, 0x02b48c48UL, 0x5bfea41bUL, 0x6b9fb0a4UL, 0xcf15a81cUL,
+0x05300ca2UL, 0x63df7188UL, 0xcb2fdeb9UL, 0xe9c9c60cUL, 0x53ffee0bUL, 0x174521e3UL, 0x352854b4UL, 0x3c29639fUL,
+0x29e741eeUL, 0x7c2d1d6eUL, 0x86520450UL, 0xf385661eUL, 0xc60134f3UL, 0x952ca230UL, 0x5008a731UL, 0x130f9360UL,
+0x1784f973UL, 0x599826a1UL, 0x445c64ecUL, 0xa977c852UL, 0xa633ffcdUL, 0x41172ba0UL, 0xa2d9ba7cUL, 0x6f038021UL,
+0x089cd950UL, 0x61483fcbUL, 0x65d76bc2UL, 0xabf6a364UL, 0x76263480UL, 0x7b5ea725UL, 0xfcd1e6e4UL, 0xe610c720UL,
+0x80b6f0cdUL, 0x3b4d8417UL, 0x4df8ee31UL, 0xe424087eUL, 0xeb49cb2cUL, 0xae3b6a84UL, 0x8878f78fUL, 0xf6605deeUL,
+0x7356f77aUL, 0xdb5cdd2fUL, 0xc13116a1UL, 0x436ff630UL, 0x54ecfab3UL, 0xfad77f15UL, 0xcc7985efUL, 0x58de52d1UL,
+0x5efd2fdbUL, 0x19ce328fUL, 0x7af96a30UL, 0xf83ef002UL, 0xd59a3199UL, 0x0ffa42c2UL, 0xb0ebe3a7UL, 0x06498ec6UL,
+0x0c23dab8UL, 0x28308280UL, 0xc8f3dedcUL, 0x71b15fd3UL, 0xc81b8a08UL, 0x60c5c0beUL, 0xe8c9a361UL, 0x4df5a8bcUL,
+0xfaef2fc7UL, 0x992e8222UL, 0xb470c582UL, 0x894ed9d8UL, 0xbc341c8bUL, 0xe6161e30UL, 0x79e93b27UL, 0xa6eaffb0UL,
+0xc6b8d961UL, 0x6948b200UL, 0x3fceffb7UL, 0x3b28dc08UL, 0x5af6da43UL, 0x9897e1f7UL, 0x2fb71976UL, 0xa49b1c8fUL,
+0xa03786dcUL, 0xb1d3a716UL, 0xb793c39fUL, 0xeb6e13a7UL, 0x3ec6bcc6UL, 0x4237511aUL, 0xbc2868efUL, 0xd6650352UL,
+0xab776a2dUL, 0x4bed2735UL, 0x16d21f82UL, 0x2e6e5c09UL, 0xfbf292dbUL, 0xcb29ea5eUL, 0xf5925814UL, 0x7f4f5891UL,
+0x7b698354UL, 0xcca86726UL, 0x48601985UL, 0xeaac4b8cUL, 0xd4603883UL, 0xf9e0230dUL, 0x8a7e386cUL, 0x49d2e60aUL,
+0x0c6084b2UL, 0x1d7335d8UL, 0x47c6b1dcUL, 0xea564cacUL, 0xb381bd3eUL, 0xb0ab0e23UL, 0x87bc3864UL, 0xfab1b5f0UL,
+0xb3a25e8fUL, 0x424618fcUL, 0x7a6b030aUL, 0xbd89b04fUL, 0x89a59d64UL, 0x5e4145a3UL, 0x2383035cUL, 0xb93b5d3eUL,
+0x7295d743UL, 0x7cd06d7eUL, 0x1edfdf06UL, 0xefc46c6cUL, 0x39a56071UL, 0x70bebf73UL, 0x05768783UL, 0xf1ec2345UL};
+
+uint32_t s3[] PROGMEM = {
+0x40c2ef8dUL, 0x9f5dfa25UL, 0xbf3d90ebUL, 0x07c910e8UL, 0xff7f6047UL, 0x4be49f36UL, 0x44c61f8cUL, 0x90caceaeUL,
+0xbff9b1beUL, 0xeacafbeeUL, 0x5019cfe8UL, 0xae07df51UL, 0x06880e92UL, 0x4805adf0UL, 0x838d3ce1UL, 0xd5107092UL,
+0x9f7d1011UL, 0xb97d6407UL, 0xd4e4e3b2UL, 0x5e284f3dUL, 0x20a8afb9UL, 0xe082defaUL, 0x8b2667a0UL, 0x2e797282UL,
+0xc0b23f55UL, 0x2be29a48UL, 0x9497efd4UL, 0xbc3f5e12UL, 0xeefcff21UL, 0xfd1b5b82UL, 0xedc55592UL, 0x40a25712UL,
+0x02831a4eUL, 0xff7fe0baUL, 0xe7468252UL, 0x0e14578eUL, 0xbff77333UL, 0x88819f8cUL, 0xe84efca6UL, 0xa5b582c9UL,
+0xb71dc0a8UL, 0x64c29f57UL, 0x314f0967UL, 0x5f3fbdf2UL, 0xc1f7ff40UL, 0xfc8db71fUL, 0xc1d26b8eUL, 0x9be57b43UL,
+0xbf3db099UL, 0x4bc6dbb5UL, 0xe6c08d63UL, 0x999d8155UL, 0x1cc897a1UL, 0x6e2d014aUL, 0x284a88c5UL, 0x716fc3ccUL,
+0x13c243b8UL, 0xf143076cUL, 0x3c890983UL, 0x5fdded0fUL, 0x50e87f2fUL, 0x7e7fc0d7UL, 0xbf7f5002UL, 0x049afb5aUL,
+0xd0d247a7UL, 0x2e195116UL, 0x3ebf70afUL, 0x8013c358UL, 0x2e30985fUL, 0xc4c37c72UL, 0x02b40f0aUL, 0x82ef7f0fUL,
+0xadfd968cUL, 0xae2a2c5dUL, 0x499ae98eUL, 0xb888da50UL, 0xa0f42784UL, 0x9057ac1eUL, 0x49b46f79UL, 0x15dc5282UL,
+0x9b7dbdefUL, 0x7d5972a6UL, 0xd840a8adUL, 0x0445f545UL, 0x03745dfaUL, 0x05c33ee8UL, 0x1a75914fUL, 0xc2695692UL,
+0x41e9ef23UL, 0x2ef103a9UL, 0xf20d2760UL, 0xb6e47602UL, 0x7465fd94UL, 0xb2857992UL, 0xcbdb7682UL, 0x76817702UL,
+0x8d91aff8UL, 0x9ef7484eUL, 0xdf6d618fUL, 0x0e849de2UL, 0x837d2f84UL, 0xc8e50c34UL, 0x82b6bb96UL, 0x48b1b493UL,
+0xab3c30efUL, 0x28af4f98UL, 0x9baf9f77UL, 0x0d56dc92UL, 0x201e4d22UL, 0x88aa3784UL, 0x96dc297dUL, 0xdcd35627UL,
+0xee7c908bUL, 0x40d21fb5UL, 0xe37cc0e7UL, 0xa1b466e5UL, 0x5e61e9c3UL, 0x9d20f83cUL, 0xe3d19460UL, 0x41a39ccdUL,
+0x0e46765cUL, 0x3b98ea00UL, 0x8178d6d4UL, 0x2c5747fdUL, 0xd9ed6cf7UL, 0x9c22a8bdUL, 0xaaad7d12UL, 0x4e078a43UL,
+0x90c0971fUL, 0x8adb1b08UL, 0xbe7ea093UL, 0x15ca38b9UL, 0xff3cb097UL, 0xf8c0c23dUL, 0xecb21a8dUL, 0x510e3864UL,
+0xfb7bcc68UL, 0x88270fd9UL, 0x81014912UL, 0xd4ffe55dUL, 0x6af87eddUL, 0x14e2a276UL, 0x6803a4b9UL, 0x8f955d92UL,
+0xfaff394bUL, 0xe9ae39baUL, 0x0bd3ffa4UL, 0x3b93f7faUL, 0x2386496dUL, 0xfabc3c19UL, 0x45756227UL, 0x7af45c82UL,
+0xa08bbd61UL, 0xd1421ed1UL, 0xf404adceUL, 0x92a37e12UL, 0xb78d4210UL, 0x72a97282UL, 0xa8c47092UL, 0x0be57d12UL,
+0xc8a15b28UL, 0x4ff4623cUL, 0xa5eac035UL, 0x31d205e8UL, 0xfb298942UL, 0x82dffcb4UL, 0x536ab64fUL, 0x5bc17d0eUL,
+0xab1f081fUL, 0xae188610UL, 0x6d08fdfcUL, 0x8928fff9UL, 0x11cc4b69UL, 0xae5c6a23UL, 0x4dcade12UL, 0xc58c3f2cUL,
+0xfe2dd0d2UL, 0x9658eff8UL, 0xda52cfe4UL, 0x675b1595UL, 0x8c484a49UL, 0x0ca8b6b9UL, 0xbc828f5cUL, 0x456bd389UL,
+0x3794603aUL, 0xa9c900ecUL, 0x53527144UL, 0x494b870aUL, 0x40bc73d7UL, 0x1c67347cUL, 0xf67e7102UL, 0x3655eb4fUL,
+0xff2fd0a2UL, 0xc460bfd2UL, 0xc0033fd4UL, 0x6defb450UL, 0xd18c4707UL, 0x88186e00UL, 0x553fe5a2UL, 0xbcd4e6b9UL,
+0x168004a2UL, 0x33385797UL, 0x677d20d7UL, 0x3d8f0fdeUL, 0x337bf872UL, 0x334fccabUL, 0x5dc58876UL, 0xb0a6007bUL,
+0x01007b94UL, 0xd2750057UL, 0xf888bbf9UL, 0x9e014289UL, 0xffa56442UL, 0xe0026385UL, 0x2bd9db72UL, 0x691b97eeUL,
+0xde2fa26eUL, 0x2bae085fUL, 0x6d617aafUL, 0x6787c9e5UL, 0xd2eb1fcfUL, 0xc2c8ef61UL, 0x7125acf1UL, 0xc23982ccUL,
+0xb84c2167UL, 0xd183e5b1UL, 0x623edcb7UL, 0xcebd107fUL, 0x385c0af9UL, 0x3d44f00fUL, 0xc66d6e60UL, 0x493a5460UL,
+0x48c12757UL, 0x1d8ae92bUL, 0x3817b48aUL, 0x24bee120UL, 0x0fda96afUL, 0x25844568UL, 0xe53b8399UL, 0x7d450d60UL,
+0x50932f28UL, 0x62b33483UL, 0x20111dd9UL, 0xa08d6d2bUL, 0x311e2b64UL, 0x005a309cUL, 0x88e6bc52UL, 0x8a58031bUL,
+0xd5efbaf7UL, 0x9ced4241UL, 0x115c31a4UL, 0xc53e3283UL, 0x3646efdfUL, 0x01c533a1UL, 0x1c53d3e9UL, 0x833735eeUL};
+
+uint32_t s4[] PROGMEM = {
+0x2004b39dUL, 0xdee9b61fUL, 0xef7bbea7UL, 0x98a273d2UL, 0xdb7b4f4aUL, 0x578cad64UL, 0x43045185UL, 0xd10e02faUL,
+0xff7a287eUL, 0x63b60fe6UL, 0xa1355f09UL, 0x20f1eb79UL, 0x439d05fdUL, 0xb1b79764UL, 0x631f64f3UL, 0xdf4a1e24UL,
+0x5f7f1428UL, 0xcdb8a24fUL, 0x400043c9UL, 0x2022c30cUL, 0x300bd3fdUL, 0x4f37a5c0UL, 0xd9002d1dUL, 0x157b1424UL,
+0x1a114deeUL, 0x6751ca0fUL, 0x4c90ff71UL, 0xfe5f192dUL, 0x5f64051aUL, 0xfefe130cUL, 0xca081b08UL, 0x21011705UL,
+0x00015380UL, 0xfe5e3ee8UL, 0xf8f49aacUL, 0x0127e77fUL, 0x5feeb8d2UL, 0x6142df06UL, 0x8a9b9ebbUL, 0x25ea9372UL,
+0xdfff84ceUL, 0x018871f5UL, 0x044bd63dUL, 0x3b266fa2UL, 0x0084d47eUL, 0xe6eb7e54UL, 0xa04c6d44UL, 0xf5d6f36cUL,
+0xdfab4926UL, 0xf5c7a0aeUL, 0xc18c3336UL, 0x937e3f50UL, 0x612077d3UL, 0xe138b611UL, 0x030e5072UL, 0xbbb20ef8UL,
+0x2e50e0abUL, 0xde778decUL, 0x811e9757UL, 0x46674fe1UL, 0x005433c9UL, 0x8f312069UL, 0x99bb1d08UL, 0xa504c3ffUL,
+0x0518354dUL, 0xe35c3d7fUL, 0xc666c8a6UL, 0xa9cc5b5dUL, 0xea6fecdaUL, 0x916f929fUL, 0x2f22469fUL, 0x7d469139UL,
+0x8e6dbfa5UL, 0x4fc44311UL, 0x02839543UL, 0xeb4e21d0UL, 0xb8832002UL, 0x0c18b63fUL, 0x1e93f818UL, 0xe6581628UL,
+0x3e6e4826UL, 0x708ad78bUL, 0xc1e47774UL, 0x7ce006b5UL, 0x250a2df3UL, 0x028b0979UL, 0x81bbeae4UL, 0x233b1228UL,
+0x38adde69UL, 0x16ca7415UL, 0x621b87dfUL, 0xb7401c21UL, 0xf99e1aa5UL, 0x7b371400UL, 0xc88a1e04UL, 0x03401109UL,
+0xd2e459bdUL, 0xd556d1e3UL, 0xd576e84fUL, 0x40a3912fUL, 0xdee87b55UL, 0xa7e4ea00UL, 0xecc2e50cUL, 0xa6bbb44dUL,
+0xffbd56e7UL, 0xac6933ddUL, 0x35b017ecUL, 0x27235706UL, 0xb0c8af99UL, 0x91c3c856UL, 0x1c81656bUL, 0x1961145eUL,
+0x75cb856eUL, 0x02c007beUL, 0x775532c2UL, 0xecf43f89UL, 0x2dc9bf5bUL, 0x253becd0UL, 0xb71a80b7UL, 0x243b6d8dUL,
+0xef63c720UL, 0xfca566c3UL, 0x8028389cUL, 0x0532ce0aUL, 0x8a54c9aaUL, 0xc7d7a1ecUL, 0x32fa1a04UL, 0x5a62161dUL,
+0x2c900167UL, 0x547a759bUL, 0xf777d431UL, 0x31b02691UL, 0xdb6fcc36UL, 0x468b0bc7UL, 0x486ae6d9UL, 0x795ae556UL,
+0xeb4c6a02UL, 0xff7e4352UL, 0xb4768f2fUL, 0xa580f90dUL, 0xe3cd7486UL, 0xeb04daedUL, 0x04bea917UL, 0xdff4182cUL,
+0x9d7f74b7UL, 0xb4f72aabUL, 0x204dc3efUL, 0x7c6b092eUL, 0x54a24117UL, 0x35a0b6e5UL, 0xf6423d21UL, 0x267c1c2cUL,
+0x0ff5c261UL, 0xf9da5265UL, 0xf831c2d2UL, 0x690f1325UL, 0xa27f16d8UL, 0xc8f21804UL, 0xa6961a00UL, 0xab26150dUL,
+0x215c3163UL, 0xec720a5eUL, 0xfdfeba49UL, 0xd9087918UL, 0x86bd0d8dUL, 0xa7701131UL, 0x0c649b3eUL, 0xd7103eccUL,
+0xb6d3cad5UL, 0x88c3ae0cUL, 0xe10130f7UL, 0xff8a726cUL, 0xa1e2ea71UL, 0x6ef39a1fUL, 0x2fd1cbcfUL, 0x1784dec1UL,
+0x6bbe07acUL, 0xd8a144cbUL, 0x560f9b8bUL, 0xc3883901UL, 0xca2fc5b1UL, 0xcd31beb4UL, 0x062878d8UL, 0xe2a4a312UL,
+0x32e57d6fUL, 0xb67efd58UL, 0x00e91ed0UL, 0xc2ffad24UL, 0xc50f99f4UL, 0xc5aa1197UL, 0x957b1d00UL, 0xd2e7e582UL,
+0xf6739810UL, 0x96306100UL, 0x21952dc3UL, 0xff21a1adUL, 0x15849029UL, 0x7f97bb7fUL, 0xdbb39eafUL, 0x2aedc929UL,
+0x65a4e25cUL, 0x2cf330a7UL, 0xe83faad0UL, 0x91c05c8aUL, 0xe72c9ed4UL, 0xa954e40cUL, 0x86cd0ad6UL, 0x19195f01UL,
+0x03910777UL, 0xf63aa0deUL, 0x5e56a878UL, 0xdf56e3deUL, 0xbe5cf021UL, 0x87e3758bUL, 0x5106c5b3UL, 0xefc3a5b8UL,
+0xd2b6eed8UL, 0x77be23e5UL, 0x294515c2UL, 0xdfef692fUL, 0xfb7ae6afUL, 0xb2c470f4UL, 0x5bebe0f3UL, 0x7698ccd6UL,
+0x0c46e439UL, 0x3885da1fUL, 0x2f838719UL, 0x677300caUL, 0xf84491a9UL, 0x9e296b29UL, 0x95c22f49UL, 0xabbe6692UL,
+0x696e67b5UL, 0xdaddd39bUL, 0x2f057edfUL, 0x1c7025dbUL, 0xee515e1bUL, 0xe62453f6UL, 0x6ce3fc6aUL, 0x04cc1603UL,
+0x3e214486UL, 0xd059dcb7UL, 0x1f296579UL, 0x43fdd6ccUL, 0x79398241UL, 0xf6cd2b93UL, 0x4dc357b6UL, 0x82d2df4eUL,
+0x0c29e57aUL, 0x6b53b93cUL, 0xfe201e85UL, 0x7e553398UL, 0xb0f0ec13UL, 0x72b3ffd3UL, 0xc1c5853fUL, 0xd27eef0aUL};
+
+#endif
+
+/*********************************************************************************************************/
+
+#ifdef BIG_ENDIAN
+
+uint32_t s5[] PROGMEM = {
+0x7ec90c04UL, 0x2c6e74b9UL, 0x9b0e66dfUL, 0xa6337911UL, 0xb86a7fffUL, 0x1dd358f5UL, 0x44dd9d44UL, 0x1731167fUL,
+0x08fbf1faUL, 0xe7f511ccUL, 0xd2051b00UL, 0x735aba00UL, 0x2ab722d8UL, 0x386381cbUL, 0xacf6243aUL, 0x69befd7aUL,
+0xe6a2e77fUL, 0xf0c720cdUL, 0xc4494816UL, 0xccf5c180UL, 0x38851640UL, 0x15b0a848UL, 0xe68b18cbUL, 0x4caadeffUL,
+0x5f480a01UL, 0x0412b2aaUL, 0x259814fcUL, 0x41d0efe2UL, 0x4e40b48dUL, 0x248eb6fbUL, 0x8dba1cfeUL, 0x41a99b02UL,
+0x1a550a04UL, 0xba8f65cbUL, 0x7251f4e7UL, 0x95a51725UL, 0xc106ecd7UL, 0x97a5980aUL, 0xc539b9aaUL, 0x4d79fe6aUL,
+0xf2f3f763UL, 0x68af8040UL, 0xed0c9e56UL, 0x11b4958bUL, 0xe1eb5a88UL, 0x8709e6b0UL, 0xd7e07156UL, 0x4e29fea7UL,
+0x6366e52dUL, 0x02d1c000UL, 0xc4ac8e05UL, 0x9377f571UL, 0x0c05372aUL, 0x578535f2UL, 0x2261be02UL, 0xd642a0c9UL,
+0xdf13a280UL, 0x74b55bd2UL, 0x682199c0UL, 0xd421e5ecUL, 0x53fb3ce8UL, 0xc8adedb3UL, 0x28a87fc9UL, 0x3d959981UL,
+0x5c1ff900UL, 0xfe38d399UL, 0x0c4eff0bUL, 0x062407eaUL, 0xaa2f4fb1UL, 0x4fb96976UL, 0x90c79505UL, 0xb0a8a774UL,
+0xef55a1ffUL, 0xe59ca2c2UL, 0xa6b62d27UL, 0xe66a4263UL, 0xdf65001fUL, 0x0ec50966UL, 0xdfdd55bcUL, 0x29de0655UL,
+0x911e739aUL, 0x17af8975UL, 0x32c7911cUL, 0x89f89468UL, 0x0d01e980UL, 0x524755f4UL, 0x03b63cc9UL, 0x0cc844b2UL,
+0xbcf3f0aaUL, 0x87ac36e9UL, 0xe53a7426UL, 0x01b3d82bUL, 0x1a9e7449UL, 0x64ee2d7eUL, 0xcddbb1daUL, 0x01c94910UL,
+0xb868bf80UL, 0x0d26f3fdUL, 0x9342ede7UL, 0x04a5c284UL, 0x636737b6UL, 0x50f5b616UL, 0xf24766e3UL, 0x8eca36c1UL,
+0x136e05dbUL, 0xfef18391UL, 0xfb887a37UL, 0xd6e7f7d4UL, 0xc7fb7dc9UL, 0x3063fcdfUL, 0xb6f589deUL, 0xec2941daUL,
+0x26e46695UL, 0xb7566419UL, 0xf654efc5UL, 0xd08d58b7UL, 0x48925401UL, 0xc1bacb7fUL, 0xe5ff550fUL, 0xb6083049UL,
+0x5bb5d0e8UL, 0x87d72e5aUL, 0xab6a6ee1UL, 0x223a66ceUL, 0xc62bf3cdUL, 0x9e0885f9UL, 0x68cb3e47UL, 0x086c010fUL,
+0xa21de820UL, 0xd18b69deUL, 0xf3f65777UL, 0xfa02c3f6UL, 0x407edac3UL, 0xcbb3d550UL, 0x1793084dUL, 0xb0d70ebaUL,
+0x0ab378d5UL, 0xd951fb0cUL, 0xded7da56UL, 0x4124bbe4UL, 0x94ca0b56UL, 0x0f5755d1UL, 0xe0e1e56eUL, 0x6184b5beUL,
+0x580a249fUL, 0x94f74bc0UL, 0xe327888eUL, 0x9f7b5561UL, 0xc3dc0280UL, 0x05687715UL, 0x646c6bd7UL, 0x44904db3UL,
+0x66b4f0a3UL, 0xc0f1648aUL, 0x697ed5afUL, 0x49e92ff6UL, 0x309e374fUL, 0x2cb6356aUL, 0x85808573UL, 0x4991f840UL,
+0x76f0ae02UL, 0x083be84dUL, 0x28421c9aUL, 0x44489406UL, 0x736e4cb8UL, 0xc1092910UL, 0x8bc95fc6UL, 0x7d869cf4UL,
+0x134f616fUL, 0x2e77118dUL, 0xb31b2be1UL, 0xaa90b472UL, 0x3ca5d717UL, 0x7d161bbaUL, 0x9cad9010UL, 0xaf462ba2UL,
+0x9fe459d2UL, 0x45d34559UL, 0xd9f2da13UL, 0xdbc65487UL, 0xf3e4f94eUL, 0x176d486fUL, 0x097c13eaUL, 0x631da5c7UL,
+0x445f7382UL, 0x175683f4UL, 0xcdc66a97UL, 0x70be0288UL, 0xb3cdcf72UL, 0x6e5dd2f3UL, 0x20936079UL, 0x459b80a5UL,
+0xbe60e2dbUL, 0xa9c23101UL, 0xeba5315cUL, 0x224e42f2UL, 0x1c5c1572UL, 0xf6721b2cUL, 0x1ad2fff3UL, 0x8c25404eUL,
+0x324ed72fUL, 0x4067b7fdUL, 0x0523138eUL, 0x5ca3bc78UL, 0xdc0fd66eUL, 0x75922283UL, 0x784d6b17UL, 0x58ebb16eUL,
+0x44094f85UL, 0x3f481d87UL, 0xfcfeae7bUL, 0x77b5ff76UL, 0x8c2302bfUL, 0xaaf47556UL, 0x5f46b02aUL, 0x2b092801UL,
+0x3d38f5f7UL, 0x0ca81f36UL, 0x52af4a8aUL, 0x66d5e7c0UL, 0xdf3b0874UL, 0x95055110UL, 0x1b5ad7a8UL, 0xf61ed5adUL,
+0x6cf6e479UL, 0x20758184UL, 0xd0cefa65UL, 0x88f7be58UL, 0x4a046826UL, 0x0ff6f8f3UL, 0xa09c7f70UL, 0x5346aba0UL,
+0x5ce96c28UL, 0xe176eda3UL, 0x6bac307fUL, 0x376829d2UL, 0x85360fa9UL, 0x17e3fe2aUL, 0x24b79767UL, 0xf5a96b20UL,
+0xd6cd2595UL, 0x68ff1ebfUL, 0x7555442cUL, 0xf19f06beUL, 0xf9e0659aUL, 0xeeb9491dUL, 0x34010718UL, 0xbb30cab8UL,
+0xe822fe15UL, 0x88570983UL, 0x750e6249UL, 0xda627e55UL, 0x5e76ffa8UL, 0xb1534546UL, 0x6d47de08UL, 0xefe9e7d4UL};
+
+
+uint32_t s6[] PROGMEM = {
+0xf6fa8f9dUL, 0x2cac6ce1UL, 0x4ca34867UL, 0xe2337f7cUL, 0x95db08e7UL, 0x016843b4UL, 0xeced5cbcUL, 0x325553acUL,
+0xbf9f0960UL, 0xdfa1e2edUL, 0x83f0579dUL, 0x63ed86b9UL, 0x1ab6a6b8UL, 0xde5ebe39UL, 0xf38ff732UL, 0x8989b138UL,
+0x33f14961UL, 0xc01937bdUL, 0xf506c6daUL, 0xe4625e7eUL, 0xa308ea99UL, 0x4e23e33cUL, 0x79cbd7ccUL, 0x48a14367UL,
+0xa3149619UL, 0xfec94bd5UL, 0xa114174aUL, 0xeaa01866UL, 0xa084db2dUL, 0x09a8486fUL, 0xa888614aUL, 0x2900af98UL,
+0x01665991UL, 0xe1992863UL, 0xc8f30c60UL, 0x2e78ef3cUL, 0xd0d51932UL, 0xcf0fec14UL, 0xf7ca07d2UL, 0xd0a82072UL,
+0xfd41197eUL, 0x9305a6b0UL, 0xe86be3daUL, 0x74bed3cdUL, 0x372da53cUL, 0x4c7f4448UL, 0xdab5d440UL, 0x6dba0ec3UL,
+0x083919a7UL, 0x9fbaeed9UL, 0x49dbcfb0UL, 0x4e670c53UL, 0x5c3d9c01UL, 0x64bdb941UL, 0x2c0e636aUL, 0xba7dd9cdUL,
+0xea6f7388UL, 0xe70bc762UL, 0x35f29adbUL, 0x5c4cdd8dUL, 0xf0d48d8cUL, 0xb88153e2UL, 0x08a19866UL, 0x1ae2eac8UL,
+0x284caf89UL, 0xaa928223UL, 0x9334be53UL, 0x3b3a21bfUL, 0x16434be3UL, 0x9aea3906UL, 0xefe8c36eUL, 0xf890cdd9UL,
+0x80226daeUL, 0xc340a4a3UL, 0xdf7e9c09UL, 0xa694a807UL, 0x5b7c5eccUL, 0x221db3a6UL, 0x9a69a02fUL, 0x68818a54UL,
+0xceb2296fUL, 0x53c0843aUL, 0xfe893655UL, 0x25bfe68aUL, 0xb4628abcUL, 0xcf222ebfUL, 0x25ac6f48UL, 0xa9a99387UL,
+0x53bddb65UL, 0xe76ffbe7UL, 0xe967fd78UL, 0x0ba93563UL, 0x8e342bc1UL, 0xe8a11be9UL, 0x4980740dUL, 0xc8087dfcUL,
+0x8de4bf99UL, 0xa11101a0UL, 0x7fd37975UL, 0xda5a26c0UL, 0xe81f994fUL, 0x9528cd89UL, 0xfd339fedUL, 0xb87834bfUL,
+0x5f04456dUL, 0x22258698UL, 0xc9c4c83bUL, 0x2dc156beUL, 0x4f628daaUL, 0x57f55ec5UL, 0xe2220abeUL, 0xd2916ebfUL,
+0x4ec75b95UL, 0x24f2c3c0UL, 0x42d15d99UL, 0xcd0d7fa0UL, 0x7b6e27ffUL, 0xa8dc8af0UL, 0x7345c106UL, 0xf41e232fUL,
+0x35162386UL, 0xe6ea8926UL, 0x3333b094UL, 0x157ec6f2UL, 0x372b74afUL, 0x692573e4UL, 0xe9a9d848UL, 0xf3160289UL,
+0x3a62ef1dUL, 0xa787e238UL, 0xf3a5f676UL, 0x74364853UL, 0x20951063UL, 0x4576698dUL, 0xb6fad407UL, 0x592af950UL,
+0x36f73523UL, 0x4cfb6e87UL, 0x7da4cec0UL, 0x6c152daaUL, 0xcb0396a8UL, 0xc50dfe5dUL, 0xfcd707abUL, 0x0921c42fUL,
+0x89dff0bbUL, 0x5fe2be78UL, 0x448f4f33UL, 0x754613c9UL, 0x2b05d08dUL, 0x48b9d585UL, 0xdc049441UL, 0xc8098f9bUL,
+0x7dede786UL, 0xc39a3373UL, 0x42410005UL, 0x6a091751UL, 0x0ef3c8a6UL, 0x890072d6UL, 0x28207682UL, 0xa9a9f7beUL,
+0xbf32679dUL, 0xd45b5b75UL, 0xb353fd00UL, 0xcbb0e358UL, 0x830f220aUL, 0x1f8fb214UL, 0xd372cf08UL, 0xcc3c4a13UL,
+0x8cf63166UL, 0x061c87beUL, 0x88c98f88UL, 0x6062e397UL, 0x47cf8e7aUL, 0xb6c85283UL, 0x3cc2acfbUL, 0x3fc06976UL,
+0x4e8f0252UL, 0x64d8314dUL, 0xda3870e3UL, 0x1e665459UL, 0xc10908f0UL, 0x513021a5UL, 0x6c5b68b7UL, 0x822f8aa0UL,
+0x3007cd3eUL, 0x74719eefUL, 0xdc872681UL, 0x073340d4UL, 0x7e432fd9UL, 0x0c5ec241UL, 0x8809286cUL, 0xf592d891UL,
+0x08a930f6UL, 0x957ef305UL, 0xb7fbffbdUL, 0xc266e96fUL, 0x6fe4ac98UL, 0xb173ecc0UL, 0xbc60b42aUL, 0x953498daUL,
+0xfba1ae12UL, 0x2d4bd736UL, 0x0f25faabUL, 0xa4f3fcebUL, 0xe2969123UL, 0x257f0c3dUL, 0x9348af49UL, 0x361400bcUL,
+0xe8816f4aUL, 0x3814f200UL, 0xa3f94043UL, 0x9c7a54c2UL, 0xbc704f57UL, 0xda41e7f9UL, 0xc25ad33aUL, 0x54f4a084UL,
+0xb17f5505UL, 0x59357cbeUL, 0xedbd15c8UL, 0x7f97c5abUL, 0xba5ac7b5UL, 0xb6f6deafUL, 0x3a479c3aUL, 0x5302da25UL,
+0x653d7e6aUL, 0x54268d49UL, 0x51a477eaUL, 0x5017d55bUL, 0xd7d25d88UL, 0x44136c76UL, 0x0404a8c8UL, 0xb8e5a121UL,
+0xb81a928aUL, 0x60ed5869UL, 0x97c55b96UL, 0xeaec991bUL, 0x29935913UL, 0x01fdb7f1UL, 0x088e8dfaUL, 0x9ab6f6f5UL,
+0x3b4cbf9fUL, 0x4a5de3abUL, 0xe6051d35UL, 0xa0e1d855UL, 0xd36b4cf1UL, 0xf544edebUL, 0xb0e93524UL, 0xbebb8fbdUL,
+0xa2d762cfUL, 0x49c92f54UL, 0x38b5f331UL, 0x7128a454UL, 0x48392905UL, 0xa65b1db8UL, 0x851c97bdUL, 0xd675cf2fUL};
+
+
+uint32_t s7[] PROGMEM = {
+0x85e04019UL, 0x332bf567UL, 0x662dbfffUL, 0xcfc65693UL, 0x2a8d7f6fUL, 0xab9bc912UL, 0xde6008a1UL, 0x2028da1fUL,
+0x0227bce7UL, 0x4d642916UL, 0x18fac300UL, 0x50f18b82UL, 0x2cb2cb11UL, 0xb232e75cUL, 0x4b3695f2UL, 0xb28707deUL,
+0xa05fbcf6UL, 0xcd4181e9UL, 0xe150210cUL, 0xe24ef1bdUL, 0xb168c381UL, 0xfde4e789UL, 0x5c79b0d8UL, 0x1e8bfd43UL,
+0x4d495001UL, 0x38be4341UL, 0x913cee1dUL, 0x92a79c3fUL, 0x089766beUL, 0xbaeeadf4UL, 0x1286becfUL, 0xb6eacb19UL,
+0x2660c200UL, 0x7565bde4UL, 0x64241f7aUL, 0x8248dca9UL, 0xc3b3ad66UL, 0x28136086UL, 0x0bd8dfa8UL, 0x356d1cf2UL,
+0x107789beUL, 0xb3b2e9ceUL, 0x0502aa8fUL, 0x0bc0351eUL, 0x166bf52aUL, 0xeb12ff82UL, 0xe3486911UL, 0xd34d7516UL,
+0x4e7b3affUL, 0x5f43671bUL, 0x9cf6e037UL, 0x4981ac83UL, 0x334266ceUL, 0x8c9341b7UL, 0xd0d854c0UL, 0xcb3a6c88UL,
+0x47bc2829UL, 0x4725ba37UL, 0xa66ad22bUL, 0x7ad61f1eUL, 0x0c5cbafaUL, 0x4437f107UL, 0xb6e79962UL, 0x42d2d816UL,
+0x0a961288UL, 0xe1a5c06eUL, 0x13749e67UL, 0x72fc081aUL, 0xb1d139f7UL, 0xf9583745UL, 0xcf19df58UL, 0xbec3f756UL,
+0xc06eba30UL, 0x07211b24UL, 0x45c28829UL, 0xc95e317fUL, 0xbc8ec511UL, 0x38bc46e9UL, 0xc6e6fa14UL, 0xbae8584aUL,
+0xad4ebc46UL, 0x468f508bUL, 0x7829435fUL, 0xf124183bUL, 0x821dba9fUL, 0xaff60ff4UL, 0xea2c4e6dUL, 0x16e39264UL,
+0x92544a8bUL, 0x009b4fc3UL, 0xaba68cedUL, 0x9ac96f78UL, 0x06a5b79aUL, 0xb2856e6eUL, 0x1aec3ca9UL, 0xbe838688UL,
+0x0e0804e9UL, 0x55f1be56UL, 0xe7e5363bUL, 0xb3a1f25dUL, 0xf7debb85UL, 0x61fe033cUL, 0x16746233UL, 0x3c034c28UL,
+0xda6d0c74UL, 0x79aac56cUL, 0x3ce4e1adUL, 0x51f0c802UL, 0x98f8f35aUL, 0x1626a49fUL, 0xeed82b29UL, 0x1d382fe3UL,
+0x0c4fb99aUL, 0xbb325778UL, 0x3ec6d97bUL, 0x6e77a6a9UL, 0xcb658b5cUL, 0xd45230c7UL, 0x2bd1408bUL, 0x60c03eb7UL,
+0xb9068d78UL, 0xa33754f4UL, 0xf430c87dUL, 0xc8a71302UL, 0xb96d8c32UL, 0xebd4e7beUL, 0xbe8b9d2dUL, 0x7979fb06UL,
+0xe7225308UL, 0x8b75cf77UL, 0x11ef8da4UL, 0xe083c858UL, 0x8d6b786fUL, 0x5a6317a6UL, 0xfa5cf7a0UL, 0x5dda0033UL,
+0xf28ebfb0UL, 0xf5b9c310UL, 0xa0eac280UL, 0x08b9767aUL, 0xa3d9d2b0UL, 0x79d34217UL, 0x021a718dUL, 0x9ac6336aUL,
+0x2711fd60UL, 0x438050e3UL, 0x069908a8UL, 0x3d7fedc4UL, 0x826d2befUL, 0x4eeb8476UL, 0x488dcf25UL, 0x36c9d566UL,
+0x28e74e41UL, 0xc2610acaUL, 0x3d49a9cfUL, 0xbae3b9dfUL, 0xb65f8de6UL, 0x92aeaf64UL, 0x3ac7d5e6UL, 0x9ea80509UL,
+0xf22b017dUL, 0xa4173f70UL, 0xdd1e16c3UL, 0x15e0d7f9UL, 0x50b1b887UL, 0x2b9f4fd5UL, 0x625aba82UL, 0x6a017962UL,
+0x2ec01b9cUL, 0x15488aa9UL, 0xd716e740UL, 0x40055a2cUL, 0x93d29a22UL, 0xe32dbf9aUL, 0x058745b9UL, 0x3453dc1eUL,
+0xd699296eUL, 0x496cff6fUL, 0x1c9f4986UL, 0xdfe2ed07UL, 0xb87242d1UL, 0x19de7eaeUL, 0x053e561aUL, 0x15ad6f8cUL,
+0x66626c1cUL, 0x7154c24cUL, 0xea082b2aUL, 0x93eb2939UL, 0x17dcb0f0UL, 0x58d4f2aeUL, 0x9ea294fbUL, 0x52cf564cUL,
+0x9883fe66UL, 0x2ec40581UL, 0x763953c3UL, 0x01d6692eUL, 0xd3a0c108UL, 0xa1e7160eUL, 0xe4f2dfa6UL, 0x693ed285UL,
+0x74904698UL, 0x4c2b0eddUL, 0x4f757656UL, 0x5d393378UL, 0xa132234fUL, 0x3d321c5dUL, 0xc3f5e194UL, 0x4b269301UL,
+0xc79f022fUL, 0x3c997e7eUL, 0x5e4f9504UL, 0x3ffafbbdUL, 0x76f7ad0eUL, 0x296693f4UL, 0x3d1fce6fUL, 0xc61e45beUL,
+0xd3b5ab34UL, 0xf72bf9b7UL, 0x1b0434c0UL, 0x4e72b567UL, 0x5592a33dUL, 0xb5229301UL, 0xcfd2a87fUL, 0x60aeb767UL,
+0x1814386bUL, 0x30bcc33dUL, 0x38a0c07dUL, 0xfd1606f2UL, 0xc363519bUL, 0x589dd390UL, 0x5479f8e6UL, 0x1cb8d647UL,
+0x97fd61a9UL, 0xea7759f4UL, 0x2d57539dUL, 0x569a58cfUL, 0xe84e63adUL, 0x462e1b78UL, 0x6580f87eUL, 0xf3817914UL,
+0x91da55f4UL, 0x40a230f3UL, 0xd1988f35UL, 0xb6e318d2UL, 0x3ffa50bcUL, 0x3d40f021UL, 0xc3c0bdaeUL, 0x4958c24cUL,
+0x518f36b2UL, 0x84b1d370UL, 0x0fedce83UL, 0x878ddadaUL, 0xf2a279c7UL, 0x94e01be8UL, 0x90716f4bUL, 0x954b8aa3UL};
+
+
+uint32_t s8[] PROGMEM = {
+0xe216300dUL, 0xbbddfffcUL, 0xa7ebdabdUL, 0x35648095UL, 0x7789f8b7UL, 0xe6c1121bUL, 0x0e241600UL, 0x052ce8b5UL,
+0x11a9cfb0UL, 0xe5952f11UL, 0xece7990aUL, 0x9386d174UL, 0x2a42931cUL, 0x76e38111UL, 0xb12def3aUL, 0x37ddddfcUL,
+0xde9adeb1UL, 0x0a0cc32cUL, 0xbe197029UL, 0x84a00940UL, 0xbb243a0fUL, 0xb4d137cfUL, 0xb44e79f0UL, 0x049eedfdUL,
+0x0b15a15dUL, 0x480d3168UL, 0x8bbbde5aUL, 0x669ded42UL, 0xc7ece831UL, 0x3f8f95e7UL, 0x72df191bUL, 0x7580330dUL,
+0x94074251UL, 0x5c7dcdfaUL, 0xabbe6d63UL, 0xaa402164UL, 0xb301d40aUL, 0x02e7d1caUL, 0x53571daeUL, 0x7a3182a2UL,
+0x12a8ddecUL, 0xfdaa335dUL, 0x176f43e8UL, 0x71fb46d4UL, 0x38129022UL, 0xce949ad4UL, 0xb84769adUL, 0x965bd862UL,
+0x82f3d055UL, 0x66fb9767UL, 0x15b80b4eUL, 0x1d5b47a0UL, 0x4cfde06fUL, 0xc28ec4b8UL, 0x57e8726eUL, 0x647a78fcUL,
+0x99865d44UL, 0x608bd593UL, 0x6c200e03UL, 0x39dc5ff6UL, 0x5d0b00a3UL, 0xae63aff2UL, 0x7e8bd632UL, 0x70108c0cUL,
+0xbbd35049UL, 0x2998df04UL, 0x980cf42aUL, 0x9b6df491UL, 0x9e7edd53UL, 0x06918548UL, 0x58cb7e07UL, 0x3b74ef2eUL,
+0x522fffb1UL, 0xd24708ccUL, 0x1c7e27cdUL, 0xa4eb215bUL, 0x3cf1d2e2UL, 0x19b47a38UL, 0x424f7618UL, 0x35856039UL,
+0x9d17dee7UL, 0x27eb35e6UL, 0xc9aff67bUL, 0x36baf5b8UL, 0x09c467cdUL, 0xc18910b1UL, 0xe11dbf7bUL, 0x06cd1af8UL,
+0x7170c608UL, 0x2d5e3354UL, 0xd4de495aUL, 0x64c6d006UL, 0xbcc0c62cUL, 0x3dd00db3UL, 0x708f8f34UL, 0x77d51b42UL,
+0x264f620fUL, 0x24b8d2bfUL, 0x15c1b79eUL, 0x46a52564UL, 0xf8d7e54eUL, 0x3e378160UL, 0x7895cda5UL, 0x859c15a5UL,
+0xe6459788UL, 0xc37bc75fUL, 0xdb07ba0cUL, 0x0676a3abUL, 0x7f229b1eUL, 0x31842e7bUL, 0x24259fd7UL, 0xf8bef472UL,
+0x835ffcb8UL, 0x6df4c1f2UL, 0x96f5b195UL, 0xfd0af0fcUL, 0xb0fe134cUL, 0xe2506d3dUL, 0x4f9b12eaUL, 0xf215f225UL,
+0xa223736fUL, 0x9fb4c428UL, 0x25d04979UL, 0x34c713f8UL, 0xc4618187UL, 0xea7a6e98UL, 0x7cd16efcUL, 0x1436876cUL,
+0xf1544107UL, 0xbedeee14UL, 0x56e9af27UL, 0xa04aa441UL, 0x3cf7c899UL, 0x92ecbae6UL, 0xdd67016dUL, 0x151682ebUL,
+0xa842eedfUL, 0xfdba60b4UL, 0xf1907b75UL, 0x20e3030fUL, 0x24d8c29eUL, 0xe139673bUL, 0xefa63fb8UL, 0x71873054UL,
+0xb6f2cf3bUL, 0x9f326442UL, 0xcb15a4ccUL, 0xb01a4504UL, 0xf1e47d8dUL, 0x844a1be5UL, 0xbae7dfdcUL, 0x42cbda70UL,
+0xcd7dae0aUL, 0x57e85b7aUL, 0xd53f5af6UL, 0x20cf4d8cUL, 0xcea4d428UL, 0x79d130a4UL, 0x3486ebfbUL, 0x33d3cddcUL,
+0x77853b53UL, 0x37effcb5UL, 0xc5068778UL, 0xe580b3e6UL, 0x4e68b8f4UL, 0xc5c8b37eUL, 0x0d809ea2UL, 0x398feb7cUL,
+0x132a4f94UL, 0x43b7950eUL, 0x2fee7d1cUL, 0x223613bdUL, 0xdd06caa2UL, 0x37df932bUL, 0xc4248289UL, 0xacf3ebc3UL,
+0x5715f6b7UL, 0xef3478ddUL, 0xf267616fUL, 0xc148cbe4UL, 0x9052815eUL, 0x5e410fabUL, 0xb48a2465UL, 0x2eda7fa4UL,
+0xe87b40e4UL, 0xe98ea084UL, 0x5889e9e1UL, 0xefd390fcUL, 0xdd07d35bUL, 0xdb485694UL, 0x38d7e5b2UL, 0x57720101UL,
+0x730edebcUL, 0x5b643113UL, 0x94917e4fUL, 0x503c2fbaUL, 0x646f1282UL, 0x7523d24aUL, 0xe0779695UL, 0xf9c17a8fUL,
+0x7a5b2121UL, 0xd187b896UL, 0x29263a4dUL, 0xba510cdfUL, 0x81f47c9fUL, 0xad1163edUL, 0xea7b5965UL, 0x1a00726eUL,
+0x11403092UL, 0x00da6d77UL, 0x4a0cdd61UL, 0xad1f4603UL, 0x605bdfb0UL, 0x9eedc364UL, 0x22ebe6a8UL, 0xcee7d28aUL,
+0xa0e736a0UL, 0x5564a6b9UL, 0x10853209UL, 0xc7eb8f37UL, 0x2de705caUL, 0x8951570fUL, 0xdf09822bUL, 0xbd691a6cUL,
+0xaa12e4f2UL, 0x87451c0fUL, 0xe0f6a27aUL, 0x3ada4819UL, 0x4cf1764fUL, 0x0d771c2bUL, 0x67cdb156UL, 0x350d8384UL,
+0x5938fa0fUL, 0x42399ef3UL, 0x36997b07UL, 0x0e84093dUL, 0x4aa93e61UL, 0x8360d87bUL, 0x1fa98b0cUL, 0x1149382cUL,
+0xe97625a5UL, 0x0614d1b7UL, 0x0e25244bUL, 0x0c768347UL, 0x589e8d82UL, 0x0d2059d1UL, 0xa466bb1eUL, 0xf8da0a82UL,
+0x04f19130UL, 0xba6e4ec0UL, 0x99265164UL, 0x1ee7230dUL, 0x50b2ad80UL, 0xeaee6801UL, 0x8db2a283UL, 0xea8bf59eUL};
+
+#else
+
+uint32_t s5[] PROGMEM = {
+0x040cc97eUL, 0xb9746e2cUL, 0xdf660e9bUL, 0x117933a6UL, 0xff7f6ab8UL, 0xf558d31dUL, 0x449ddd44UL, 0x7f163117UL,
+0xfaf1fb08UL, 0xcc11f5e7UL, 0x001b05d2UL, 0x00ba5a73UL, 0xd822b72aUL, 0xcb816338UL, 0x3a24f6acUL, 0x7afdbe69UL,
+0x7fe7a2e6UL, 0xcd20c7f0UL, 0x164849c4UL, 0x80c1f5ccUL, 0x40168538UL, 0x48a8b015UL, 0xcb188be6UL, 0xffdeaa4cUL,
+0x010a485fUL, 0xaab21204UL, 0xfc149825UL, 0xe2efd041UL, 0x8db4404eUL, 0xfbb68e24UL, 0xfe1cba8dUL, 0x029ba941UL,
+0x040a551aUL, 0xcb658fbaUL, 0xe7f45172UL, 0x2517a595UL, 0xd7ec06c1UL, 0x0a98a597UL, 0xaab939c5UL, 0x6afe794dUL,
+0x63f7f3f2UL, 0x4080af68UL, 0x569e0cedUL, 0x8b95b411UL, 0x885aebe1UL, 0xb0e60987UL, 0x5671e0d7UL, 0xa7fe294eUL,
+0x2de56663UL, 0x00c0d102UL, 0x058eacc4UL, 0x71f57793UL, 0x2a37050cUL, 0xf2358557UL, 0x02be6122UL, 0xc9a042d6UL,
+0x80a213dfUL, 0xd25bb574UL, 0xc0992168UL, 0xece521d4UL, 0xe83cfb53UL, 0xb3edadc8UL, 0xc97fa828UL, 0x8199953dUL,
+0x00f91f5cUL, 0x99d338feUL, 0x0bff4e0cUL, 0xea072406UL, 0xb14f2faaUL, 0x7669b94fUL, 0x0595c790UL, 0x74a7a8b0UL,
+0xffa155efUL, 0xc2a29ce5UL, 0x272db6a6UL, 0x63426ae6UL, 0x1f0065dfUL, 0x6609c50eUL, 0xbc55dddfUL, 0x5506de29UL,
+0x9a731e91UL, 0x7589af17UL, 0x1c91c732UL, 0x6894f889UL, 0x80e9010dUL, 0xf4554752UL, 0xc93cb603UL, 0xb244c80cUL,
+0xaaf0f3bcUL, 0xe936ac87UL, 0x26743ae5UL, 0x2bd8b301UL, 0x49749e1aUL, 0x7e2dee64UL, 0xdab1dbcdUL, 0x1049c901UL,
+0x80bf68b8UL, 0xfdf3260dUL, 0xe7ed4293UL, 0x84c2a504UL, 0xb6376763UL, 0x16b6f550UL, 0xe36647f2UL, 0xc136ca8eUL,
+0xdb056e13UL, 0x9183f1feUL, 0x377a88fbUL, 0xd4f7e7d6UL, 0xc97dfbc7UL, 0xdffc6330UL, 0xde89f5b6UL, 0xda4129ecUL,
+0x9566e426UL, 0x196456b7UL, 0xc5ef54f6UL, 0xb7588dd0UL, 0x01549248UL, 0x7fcbbac1UL, 0x0f55ffe5UL, 0x493008b6UL,
+0xe8d0b55bUL, 0x5a2ed787UL, 0xe16e6aabUL, 0xce663a22UL, 0xcdf32bc6UL, 0xf985089eUL, 0x473ecb68UL, 0x0f016c08UL,
+0x20e81da2UL, 0xde698bd1UL, 0x7757f6f3UL, 0xf6c302faUL, 0xc3da7e40UL, 0x50d5b3cbUL, 0x4d089317UL, 0xba0ed7b0UL,
+0xd578b30aUL, 0x0cfb51d9UL, 0x56dad7deUL, 0xe4bb2441UL, 0x560bca94UL, 0xd155570fUL, 0x6ee5e1e0UL, 0xbeb58461UL,
+0x9f240a58UL, 0xc04bf794UL, 0x8e8827e3UL, 0x61557b9fUL, 0x8002dcc3UL, 0x15776805UL, 0xd76b6c64UL, 0xb34d9044UL,
+0xa3f0b466UL, 0x8a64f1c0UL, 0xafd57e69UL, 0xf62fe949UL, 0x4f379e30UL, 0x6a35b62cUL, 0x73858085UL, 0x40f89149UL,
+0x02aef076UL, 0x4de83b08UL, 0x9a1c4228UL, 0x06944844UL, 0xb84c6e73UL, 0x102909c1UL, 0xc65fc98bUL, 0xf49c867dUL,
+0x6f614f13UL, 0x8d11772eUL, 0xe12b1bb3UL, 0x72b490aaUL, 0x17d7a53cUL, 0xba1b167dUL, 0x1090ad9cUL, 0xa22b46afUL,
+0xd259e49fUL, 0x5945d345UL, 0x13daf2d9UL, 0x8754c6dbUL, 0x4ef9e4f3UL, 0x6f486d17UL, 0xea137c09UL, 0xc7a51d63UL,
+0x82735f44UL, 0xf4835617UL, 0x976ac6cdUL, 0x8802be70UL, 0x72cfcdb3UL, 0xf3d25d6eUL, 0x79609320UL, 0xa5809b45UL,
+0xdbe260beUL, 0x0131c2a9UL, 0x5c31a5ebUL, 0xf2424e22UL, 0x72155c1cUL, 0x2c1b72f6UL, 0xf3ffd21aUL, 0x4e40258cUL,
+0x2fd74e32UL, 0xfdb76740UL, 0x8e132305UL, 0x78bca35cUL, 0x6ed60fdcUL, 0x83229275UL, 0x176b4d78UL, 0x6eb1eb58UL,
+0x854f0944UL, 0x871d483fUL, 0x7baefefcUL, 0x76ffb577UL, 0xbf02238cUL, 0x5675f4aaUL, 0x2ab0465fUL, 0x0128092bUL,
+0xf7f5383dUL, 0x361fa80cUL, 0x8a4aaf52UL, 0xc0e7d566UL, 0x74083bdfUL, 0x10510595UL, 0xa8d75a1bUL, 0xadd51ef6UL,
+0x79e4f66cUL, 0x84817520UL, 0x65faced0UL, 0x58bef788UL, 0x2668044aUL, 0xf3f8f60fUL, 0x707f9ca0UL, 0xa0ab4653UL,
+0x286ce95cUL, 0xa3ed76e1UL, 0x7f30ac6bUL, 0xd2296837UL, 0xa90f3685UL, 0x2afee317UL, 0x6797b724UL, 0x206ba9f5UL,
+0x9525cdd6UL, 0xbf1eff68UL, 0x2c445575UL, 0xbe069ff1UL, 0x9a65e0f9UL, 0x1d49b9eeUL, 0x18070134UL, 0xb8ca30bbUL,
+0x15fe22e8UL, 0x83095788UL, 0x49620e75UL, 0x557e62daUL, 0xa8ff765eUL, 0x464553b1UL, 0x08de476dUL, 0xd4e7e9efUL};
+
+
+uint32_t s6[] PROGMEM = {
+0x9d8ffaf6UL, 0xe16cac2cUL, 0x6748a34cUL, 0x7c7f33e2UL, 0xe708db95UL, 0xb4436801UL, 0xbc5cedecUL, 0xac535532UL,
+0x60099fbfUL, 0xede2a1dfUL, 0x9d57f083UL, 0xb986ed63UL, 0xb8a6b61aUL, 0x39be5edeUL, 0x32f78ff3UL, 0x38b18989UL,
+0x6149f133UL, 0xbd3719c0UL, 0xdac606f5UL, 0x7e5e62e4UL, 0x99ea08a3UL, 0x3ce3234eUL, 0xccd7cb79UL, 0x6743a148UL,
+0x199614a3UL, 0xd54bc9feUL, 0x4a1714a1UL, 0x6618a0eaUL, 0x2ddb84a0UL, 0x6f48a809UL, 0x4a6188a8UL, 0x98af0029UL,
+0x91596601UL, 0x632899e1UL, 0x600cf3c8UL, 0x3cef782eUL, 0x3219d5d0UL, 0x14ec0fcfUL, 0xd207caf7UL, 0x7220a8d0UL,
+0x7e1941fdUL, 0xb0a60593UL, 0xdae36be8UL, 0xcdd3be74UL, 0x3ca52d37UL, 0x48447f4cUL, 0x40d4b5daUL, 0xc30eba6dUL,
+0xa7193908UL, 0xd9eeba9fUL, 0xb0cfdb49UL, 0x530c674eUL, 0x019c3d5cUL, 0x41b9bd64UL, 0x6a630e2cUL, 0xcdd97dbaUL,
+0x88736feaUL, 0x62c70be7UL, 0xdb9af235UL, 0x8ddd4c5cUL, 0x8c8dd4f0UL, 0xe25381b8UL, 0x6698a108UL, 0xc8eae21aUL,
+0x89af4c28UL, 0x238292aaUL, 0x53be3493UL, 0xbf213a3bUL, 0xe34b4316UL, 0x0639ea9aUL, 0x6ec3e8efUL, 0xd9cd90f8UL,
+0xae6d2280UL, 0xa3a440c3UL, 0x099c7edfUL, 0x07a894a6UL, 0xcc5e7c5bUL, 0xa6b31d22UL, 0x2fa0699aUL, 0x548a8168UL,
+0x6f29b2ceUL, 0x3a84c053UL, 0x553689feUL, 0x8ae6bf25UL, 0xbc8a62b4UL, 0xbf2e22cfUL, 0x486fac25UL, 0x8793a9a9UL,
+0x65dbbd53UL, 0xe7fb6fe7UL, 0x78fd67e9UL, 0x6335a90bUL, 0xc12b348eUL, 0xe91ba1e8UL, 0x0d748049UL, 0xfc7d08c8UL,
+0x99bfe48dUL, 0xa00111a1UL, 0x7579d37fUL, 0xc0265adaUL, 0x4f991fe8UL, 0x89cd2895UL, 0xed9f33fdUL, 0xbf3478b8UL,
+0x6d45045fUL, 0x98862522UL, 0x3bc8c4c9UL, 0xbe56c12dUL, 0xaa8d624fUL, 0xc55ef557UL, 0xbe0a22e2UL, 0xbf6e91d2UL,
+0x955bc74eUL, 0xc0c3f224UL, 0x995dd142UL, 0xa07f0dcdUL, 0xff276e7bUL, 0xf08adca8UL, 0x06c14573UL, 0x2f231ef4UL,
+0x86231635UL, 0x2689eae6UL, 0x94b03333UL, 0xf2c67e15UL, 0xaf742b37UL, 0xe4732569UL, 0x48d8a9e9UL, 0x890216f3UL,
+0x1def623aUL, 0x38e287a7UL, 0x76f6a5f3UL, 0x53483674UL, 0x63109520UL, 0x8d697645UL, 0x07d4fab6UL, 0x50f92a59UL,
+0x2335f736UL, 0x876efb4cUL, 0xc0cea47dUL, 0xaa2d156cUL, 0xa89603cbUL, 0x5dfe0dc5UL, 0xab07d7fcUL, 0x2fc42109UL,
+0xbbf0df89UL, 0x78bee25fUL, 0x334f8f44UL, 0xc9134675UL, 0x8dd0052bUL, 0x85d5b948UL, 0x419404dcUL, 0x9b8f09c8UL,
+0x86e7ed7dUL, 0x73339ac3UL, 0x05004142UL, 0x5117096aUL, 0xa6c8f30eUL, 0xd6720089UL, 0x82762028UL, 0xbef7a9a9UL,
+0x9d6732bfUL, 0x755b5bd4UL, 0x00fd53b3UL, 0x58e3b0cbUL, 0x0a220f83UL, 0x14b28f1fUL, 0x08cf72d3UL, 0x134a3cccUL,
+0x6631f68cUL, 0xbe871c06UL, 0x888fc988UL, 0x97e36260UL, 0x7a8ecf47UL, 0x8352c8b6UL, 0xfbacc23cUL, 0x7669c03fUL,
+0x52028f4eUL, 0x4d31d864UL, 0xe37038daUL, 0x5954661eUL, 0xf00809c1UL, 0xa5213051UL, 0xb7685b6cUL, 0xa08a2f82UL,
+0x3ecd0730UL, 0xef9e7174UL, 0x812687dcUL, 0xd4403307UL, 0xd92f437eUL, 0x41c25e0cUL, 0x6c280988UL, 0x91d892f5UL,
+0xf630a908UL, 0x05f37e95UL, 0xbdfffbb7UL, 0x6fe966c2UL, 0x98ace46fUL, 0xc0ec73b1UL, 0x2ab460bcUL, 0xda983495UL,
+0x12aea1fbUL, 0x36d74b2dUL, 0xabfa250fUL, 0xebfcf3a4UL, 0x239196e2UL, 0x3d0c7f25UL, 0x49af4893UL, 0xbc001436UL,
+0x4a6f81e8UL, 0x00f21438UL, 0x4340f9a3UL, 0xc2547a9cUL, 0x574f70bcUL, 0xf9e741daUL, 0x3ad35ac2UL, 0x84a0f454UL,
+0x05557fb1UL, 0xbe7c3559UL, 0xc815bdedUL, 0xabc5977fUL, 0xb5c75abaUL, 0xafdef6b6UL, 0x3a9c473aUL, 0x25da0253UL,
+0x6a7e3d65UL, 0x498d2654UL, 0xea77a451UL, 0x5bd51750UL, 0x885dd2d7UL, 0x766c1344UL, 0xc8a80404UL, 0x21a1e5b8UL,
+0x8a921ab8UL, 0x6958ed60UL, 0x965bc597UL, 0x1b99eceaUL, 0x13599329UL, 0xf1b7fd01UL, 0xfa8d8e08UL, 0xf5f6b69aUL,
+0x9fbf4c3bUL, 0xabe35d4aUL, 0x351d05e6UL, 0x55d8e1a0UL, 0xf14c6bd3UL, 0xebed44f5UL, 0x2435e9b0UL, 0xbd8fbbbeUL,
+0xcf62d7a2UL, 0x542fc949UL, 0x31f3b538UL, 0x54a42871UL, 0x05293948UL, 0xb81d5ba6UL, 0xbd971c85UL, 0x2fcf75d6UL};
+
+
+uint32_t s7[] PROGMEM = {
+0x1940e085UL, 0x67f52b33UL, 0xffbf2d66UL, 0x9356c6cfUL, 0x6f7f8d2aUL, 0x12c99babUL, 0xa10860deUL, 0x1fda2820UL,
+0xe7bc2702UL, 0x1629644dUL, 0x00c3fa18UL, 0x828bf150UL, 0x11cbb22cUL, 0x5ce732b2UL, 0xf295364bUL, 0xde0787b2UL,
+0xf6bc5fa0UL, 0xe98141cdUL, 0x0c2150e1UL, 0xbdf14ee2UL, 0x81c368b1UL, 0x89e7e4fdUL, 0xd8b0795cUL, 0x43fd8b1eUL,
+0x0150494dUL, 0x4143be38UL, 0x1dee3c91UL, 0x3f9ca792UL, 0xbe669708UL, 0xf4adeebaUL, 0xcfbe8612UL, 0x19cbeab6UL,
+0x00c26026UL, 0xe4bd6575UL, 0x7a1f2464UL, 0xa9dc4882UL, 0x66adb3c3UL, 0x86601328UL, 0xa8dfd80bUL, 0xf21c6d35UL,
+0xbe897710UL, 0xcee9b2b3UL, 0x8faa0205UL, 0x1e35c00bUL, 0x2af56b16UL, 0x82ff12ebUL, 0x116948e3UL, 0x16754dd3UL,
+0xff3a7b4eUL, 0x1b67435fUL, 0x37e0f69cUL, 0x83ac8149UL, 0xce664233UL, 0xb741938cUL, 0xc054d8d0UL, 0x886c3acbUL,
+0x2928bc47UL, 0x37ba2547UL, 0x2bd26aa6UL, 0x1e1fd67aUL, 0xfaba5c0cUL, 0x07f13744UL, 0x6299e7b6UL, 0x16d8d242UL,
+0x8812960aUL, 0x6ec0a5e1UL, 0x679e7413UL, 0x1a08fc72UL, 0xf739d1b1UL, 0x453758f9UL, 0x58df19cfUL, 0x56f7c3beUL,
+0x30ba6ec0UL, 0x241b2107UL, 0x2988c245UL, 0x7f315ec9UL, 0x11c58ebcUL, 0xe946bc38UL, 0x14fae6c6UL, 0x4a58e8baUL,
+0x46bc4eadUL, 0x8b508f46UL, 0x5f432978UL, 0x3b1824f1UL, 0x9fba1d82UL, 0xf40ff6afUL, 0x6d4e2ceaUL, 0x6492e316UL,
+0x8b4a5492UL, 0xc34f9b00UL, 0xed8ca6abUL, 0x786fc99aUL, 0x9ab7a506UL, 0x6e6e85b2UL, 0xa93cec1aUL, 0x888683beUL,
+0xe904080eUL, 0x56bef155UL, 0x3b36e5e7UL, 0x5df2a1b3UL, 0x85bbdef7UL, 0x3c03fe61UL, 0x33627416UL, 0x284c033cUL,
+0x740c6ddaUL, 0x6cc5aa79UL, 0xade1e43cUL, 0x02c8f051UL, 0x5af3f898UL, 0x9fa42616UL, 0x292bd8eeUL, 0xe32f381dUL,
+0x9ab94f0cUL, 0x785732bbUL, 0x7bd9c63eUL, 0xa9a6776eUL, 0x5c8b65cbUL, 0xc73052d4UL, 0x8b40d12bUL, 0xb73ec060UL,
+0x788d06b9UL, 0xf45437a3UL, 0x7dc830f4UL, 0x0213a7c8UL, 0x328c6db9UL, 0xbee7d4ebUL, 0x2d9d8bbeUL, 0x06fb7979UL,
+0x085322e7UL, 0x77cf758bUL, 0xa48def11UL, 0x58c883e0UL, 0x6f786b8dUL, 0xa617635aUL, 0xa0f75cfaUL, 0x3300da5dUL,
+0xb0bf8ef2UL, 0x10c3b9f5UL, 0x80c2eaa0UL, 0x7a76b908UL, 0xb0d2d9a3UL, 0x1742d379UL, 0x8d711a02UL, 0x6a33c69aUL,
+0x60fd1127UL, 0xe3508043UL, 0xa8089906UL, 0xc4ed7f3dUL, 0xef2b6d82UL, 0x7684eb4eUL, 0x25cf8d48UL, 0x66d5c936UL,
+0x414ee728UL, 0xca0a61c2UL, 0xcfa9493dUL, 0xdfb9e3baUL, 0xe68d5fb6UL, 0x64afae92UL, 0xe6d5c73aUL, 0x0905a89eUL,
+0x7d012bf2UL, 0x703f17a4UL, 0xc3161eddUL, 0xf9d7e015UL, 0x87b8b150UL, 0xd54f9f2bUL, 0x82ba5a62UL, 0x6279016aUL,
+0x9c1bc02eUL, 0xa98a4815UL, 0x40e716d7UL, 0x2c5a0540UL, 0x229ad293UL, 0x9abf2de3UL, 0xb9458705UL, 0x1edc5334UL,
+0x6e2999d6UL, 0x6fff6c49UL, 0x86499f1cUL, 0x07ede2dfUL, 0xd14272b8UL, 0xae7ede19UL, 0x1a563e05UL, 0x8c6fad15UL,
+0x1c6c6266UL, 0x4cc25471UL, 0x2a2b08eaUL, 0x3929eb93UL, 0xf0b0dc17UL, 0xaef2d458UL, 0xfb94a29eUL, 0x4c56cf52UL,
+0x66fe8398UL, 0x8105c42eUL, 0xc3533976UL, 0x2e69d601UL, 0x08c1a0d3UL, 0x0e16e7a1UL, 0xa6dff2e4UL, 0x85d23e69UL,
+0x98469074UL, 0xdd0e2b4cUL, 0x5676754fUL, 0x7833395dUL, 0x4f2332a1UL, 0x5d1c323dUL, 0x94e1f5c3UL, 0x0193264bUL,
+0x2f029fc7UL, 0x7e7e993cUL, 0x04954f5eUL, 0xbdfbfa3fUL, 0x0eadf776UL, 0xf4936629UL, 0x6fce1f3dUL, 0xbe451ec6UL,
+0x34abb5d3UL, 0xb7f92bf7UL, 0xc034041bUL, 0x67b5724eUL, 0x3da39255UL, 0x019322b5UL, 0x7fa8d2cfUL, 0x67b7ae60UL,
+0x6b381418UL, 0x3dc3bc30UL, 0x7dc0a038UL, 0xf20616fdUL, 0x9b5163c3UL, 0x90d39d58UL, 0xe6f87954UL, 0x47d6b81cUL,
+0xa961fd97UL, 0xf45977eaUL, 0x9d53572dUL, 0xcf589a56UL, 0xad634ee8UL, 0x781b2e46UL, 0x7ef88065UL, 0x147981f3UL,
+0xf455da91UL, 0xf330a240UL, 0x358f98d1UL, 0xd218e3b6UL, 0xbc50fa3fUL, 0x21f0403dUL, 0xaebdc0c3UL, 0x4cc25849UL,
+0xb2368f51UL, 0x70d3b184UL, 0x83ceed0fUL, 0xdada8d87UL, 0xc779a2f2UL, 0xe81be094UL, 0x4b6f7190UL, 0xa38a4b95UL};
+
+
+uint32_t s8[] PROGMEM = {
+0x0d3016e2UL, 0xfcffddbbUL, 0xbddaeba7UL, 0x95806435UL, 0xb7f88977UL, 0x1b12c1e6UL, 0x0016240eUL, 0xb5e82c05UL,
+0xb0cfa911UL, 0x112f95e5UL, 0x0a99e7ecUL, 0x74d18693UL, 0x1c93422aUL, 0x1181e376UL, 0x3aef2db1UL, 0xfcdddd37UL,
+0xb1de9adeUL, 0x2cc30c0aUL, 0x297019beUL, 0x4009a084UL, 0x0f3a24bbUL, 0xcf37d1b4UL, 0xf0794eb4UL, 0xfded9e04UL,
+0x5da1150bUL, 0x68310d48UL, 0x5adebb8bUL, 0x42ed9d66UL, 0x31e8ecc7UL, 0xe7958f3fUL, 0x1b19df72UL, 0x0d338075UL,
+0x51420794UL, 0xfacd7d5cUL, 0x636dbeabUL, 0x642140aaUL, 0x0ad401b3UL, 0xcad1e702UL, 0xae1d5753UL, 0xa282317aUL,
+0xecdda812UL, 0x5d33aafdUL, 0xe8436f17UL, 0xd446fb71UL, 0x22901238UL, 0xd49a94ceUL, 0xad6947b8UL, 0x62d85b96UL,
+0x55d0f382UL, 0x6797fb66UL, 0x4e0bb815UL, 0xa0475b1dUL, 0x6fe0fd4cUL, 0xb8c48ec2UL, 0x6e72e857UL, 0xfc787a64UL,
+0x445d8699UL, 0x93d58b60UL, 0x030e206cUL, 0xf65fdc39UL, 0xa3000b5dUL, 0xf2af63aeUL, 0x32d68b7eUL, 0x0c8c1070UL,
+0x4950d3bbUL, 0x04df9829UL, 0x2af40c98UL, 0x91f46d9bUL, 0x53dd7e9eUL, 0x48859106UL, 0x077ecb58UL, 0x2eef743bUL,
+0xb1ff2f52UL, 0xcc0847d2UL, 0xcd277e1cUL, 0x5b21eba4UL, 0xe2d2f13cUL, 0x387ab419UL, 0x18764f42UL, 0x39608535UL,
+0xe7de179dUL, 0xe635eb27UL, 0x7bf6afc9UL, 0xb8f5ba36UL, 0xcd67c409UL, 0xb11089c1UL, 0x7bbf1de1UL, 0xf81acd06UL,
+0x08c67071UL, 0x54335e2dUL, 0x5a49ded4UL, 0x06d0c664UL, 0x2cc6c0bcUL, 0xb30dd03dUL, 0x348f8f70UL, 0x421bd577UL,
+0x0f624f26UL, 0xbfd2b824UL, 0x9eb7c115UL, 0x6425a546UL, 0x4ee5d7f8UL, 0x6081373eUL, 0xa5cd9578UL, 0xa5159c85UL,
+0x889745e6UL, 0x5fc77bc3UL, 0x0cba07dbUL, 0xaba37606UL, 0x1e9b227fUL, 0x7b2e8431UL, 0xd79f2524UL, 0x72f4bef8UL,
+0xb8fc5f83UL, 0xf2c1f46dUL, 0x95b1f596UL, 0xfcf00afdUL, 0x4c13feb0UL, 0x3d6d50e2UL, 0xea129b4fUL, 0x25f215f2UL,
+0x6f7323a2UL, 0x28c4b49fUL, 0x7949d025UL, 0xf813c734UL, 0x878161c4UL, 0x986e7aeaUL, 0xfc6ed17cUL, 0x6c873614UL,
+0x074154f1UL, 0x14eedebeUL, 0x27afe956UL, 0x41a44aa0UL, 0x99c8f73cUL, 0xe6baec92UL, 0x6d0167ddUL, 0xeb821615UL,
+0xdfee42a8UL, 0xb460bafdUL, 0x757b90f1UL, 0x0f03e320UL, 0x9ec2d824UL, 0x3b6739e1UL, 0xb83fa6efUL, 0x54308771UL,
+0x3bcff2b6UL, 0x4264329fUL, 0xcca415cbUL, 0x04451ab0UL, 0x8d7de4f1UL, 0xe51b4a84UL, 0xdcdfe7baUL, 0x70dacb42UL,
+0x0aae7dcdUL, 0x7a5be857UL, 0xf65a3fd5UL, 0x8c4dcf20UL, 0x28d4a4ceUL, 0xa430d179UL, 0xfbeb8634UL, 0xdccdd333UL,
+0x533b8577UL, 0xb5fcef37UL, 0x788706c5UL, 0xe6b380e5UL, 0xf4b8684eUL, 0x7eb3c8c5UL, 0xa29e800dUL, 0x7ceb8f39UL,
+0x944f2a13UL, 0x0e95b743UL, 0x1c7dee2fUL, 0xbd133622UL, 0xa2ca06ddUL, 0x2b93df37UL, 0x898224c4UL, 0xc3ebf3acUL,
+0xb7f61557UL, 0xdd7834efUL, 0x6f6167f2UL, 0xe4cb48c1UL, 0x5e815290UL, 0xab0f415eUL, 0x65248ab4UL, 0xa47fda2eUL,
+0xe4407be8UL, 0x84a08ee9UL, 0xe1e98958UL, 0xfc90d3efUL, 0x5bd307ddUL, 0x945648dbUL, 0xb2e5d738UL, 0x01017257UL,
+0xbcde0e73UL, 0x1331645bUL, 0x4f7e9194UL, 0xba2f3c50UL, 0x82126f64UL, 0x4ad22375UL, 0x959677e0UL, 0x8f7ac1f9UL,
+0x21215b7aUL, 0x96b887d1UL, 0x4d3a2629UL, 0xdf0c51baUL, 0x9f7cf481UL, 0xed6311adUL, 0x65597beaUL, 0x6e72001aUL,
+0x92304011UL, 0x776dda00UL, 0x61dd0c4aUL, 0x03461fadUL, 0xb0df5b60UL, 0x64c3ed9eUL, 0xa8e6eb22UL, 0x8ad2e7ceUL,
+0xa036e7a0UL, 0xb9a66455UL, 0x09328510UL, 0x378febc7UL, 0xca05e72dUL, 0x0f575189UL, 0x2b8209dfUL, 0x6c1a69bdUL,
+0xf2e412aaUL, 0x0f1c4587UL, 0x7aa2f6e0UL, 0x1948da3aUL, 0x4f76f14cUL, 0x2b1c770dUL, 0x56b1cd67UL, 0x84830d35UL,
+0x0ffa3859UL, 0xf39e3942UL, 0x077b9936UL, 0x3d09840eUL, 0x613ea94aUL, 0x7bd86083UL, 0x0c8ba91fUL, 0x2c384911UL,
+0xa52576e9UL, 0xb7d11406UL, 0x4b24250eUL, 0x4783760cUL, 0x828d9e58UL, 0xd159200dUL, 0x1ebb66a4UL, 0x820adaf8UL,
+0x3091f104UL, 0xc04e6ebaUL, 0x64512699UL, 0x0d23e71eUL, 0x80adb250UL, 0x0168eeeaUL, 0x83a2b28dUL, 0x9ef58beaUL};
+
+
+#endif 
+
+
+#endif
+
diff --git a/cast5/cast5.c b/cast5/cast5.c
new file mode 100644
index 0000000..6d7f42f
--- /dev/null
+++ b/cast5/cast5.c
@@ -0,0 +1,343 @@
+/* cast5.c */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/* 
+ * \file	cast5.c
+ * \author	Daniel Otte
+ * \email       daniel.otte@rub.de
+ * \date 	2006-07-26
+ * \par License:
+ *  GPLv3 or later
+ * \brief Implementation of the CAST5 (aka CAST-128) cipher algorithm as described in RFC 2144
+ * 
+ */
+ 
+ #include <stdint.h>
+ #include <string.h>
+ #include "cast5.h"
+ #include "config.h"
+ #include "debug.h"
+ 
+ #undef DEBUG
+ 
+ #ifdef DEBUG
+  #include "cli.h"
+ #endif
+ 
+#include "cast5-sbox.h"
+
+
+ 
+#define S5(x) pgm_read_dword(&s5[(x)])
+#define S6(x) pgm_read_dword(&s6[(x)])
+#define S7(x) pgm_read_dword(&s7[(x)])
+#define S8(x) pgm_read_dword(&s8[(x)])
+
+static 
+void cast5_init_A(uint8_t *dest, uint8_t *src, bool bmode){
+	uint8_t mask = bmode?0x8:0;
+	*((uint32_t*)(&dest[0x0])) = *((uint32_t*)(&src[0x0^mask]))
+                                     ^ S5(src[0xD^mask]) ^ S6(src[0xF^mask]) 
+                                     ^ S7(src[0xC^mask]) ^ S8(src[0xE^mask]) 
+                                     ^ S7(src[0x8^mask]);
+	*((uint32_t*)(&dest[0x4])) = *((uint32_t*)(&src[0x8^mask])) 
+                                     ^ S5(dest[0x0]) ^ S6(dest[0x2]) 
+                                     ^ S7(dest[0x1]) ^ S8(dest[0x3]) 
+                                     ^ S8(src[0xA^mask]);
+	*((uint32_t*)(&dest[0x8])) = *((uint32_t*)(&src[0xC^mask])) 
+                                     ^ S5(dest[0x7]) ^ S6(dest[0x6]) 
+                                     ^ S7(dest[0x5]) ^ S8(dest[0x4]) 
+                                     ^ S5(src[0x9^mask]);
+	*((uint32_t*)(&dest[0xC])) = *((uint32_t*)(&src[0x4^mask])) 
+                                     ^ S5(dest[0xA]) 
+                                     ^ S6(dest[0x9]) 
+                                     ^ S7(dest[0xB]) 
+                                     ^ S8(dest[0x8]) 
+                                     ^ S6(src[0xB^mask]);
+}
+
+static
+void cast5_init_M(uint8_t *dest, uint8_t *src, bool nmode, bool xmode){
+	uint8_t nmt[] = {0xB, 0xA, 0x9, 0x8, 
+                         0xF, 0xE, 0xD, 0xC, 
+                         0x3, 0x2, 0x1, 0x0, 
+                         0x7, 0x6, 0x5, 0x4}; /* nmode table */
+	uint8_t xmt[4][4] = {{0x2, 0x6, 0x9, 0xC}, 
+                             {0x8, 0xD, 0x3, 0x7}, 
+                             {0x3, 0x7, 0x8, 0xD}, 
+                             {0x9, 0xC, 0x2, 0x6}};
+	#define NMT(x) (src[nmode?nmt[(x)]:(x)])
+	#define XMT(x) (src[xmt[(xmode<<1) + nmode][(x)]])
+	*((uint32_t*)(&dest[0x0])) = S5(NMT(0x8)) ^ S6(NMT(0x9)) ^ S7(NMT(0x7)) ^ S8(NMT(0x6)) ^ S5(XMT(0));
+	*((uint32_t*)(&dest[0x4])) = S5(NMT(0xA)) ^ S6(NMT(0xB)) ^ S7(NMT(0x5)) ^ S8(NMT(0x4)) ^ S6(XMT(1));
+	*((uint32_t*)(&dest[0x8])) = S5(NMT(0xC)) ^ S6(NMT(0xD)) ^ S7(NMT(0x3)) ^ S8(NMT(0x2)) ^ S7(XMT(2));
+	*((uint32_t*)(&dest[0xC])) = S5(NMT(0xE)) ^ S6(NMT(0xF)) ^ S7(NMT(0x1)) ^ S8(NMT(0x0)) ^ S8(XMT(3));
+}
+
+#define S5B(x) pgm_read_byte(3+(uint8_t*)(&s5[(x)]))
+#define S6B(x) pgm_read_byte(3+(uint8_t*)(&s6[(x)]))
+#define S7B(x) pgm_read_byte(3+(uint8_t*)(&s7[(x)]))
+#define S8B(x) pgm_read_byte(3+(uint8_t*)(&s8[(x)]))
+
+static
+void cast5_init_rM(uint8_t *klo, uint8_t *khi, uint8_t offset, uint8_t *src, bool nmode, bool xmode){
+	uint8_t nmt[] = {0xB, 0xA, 0x9, 0x8, 0xF, 0xE, 0xD, 0xC, 0x3, 0x2, 0x1, 0x0, 0x7, 0x6, 0x5, 0x4}; /* nmode table */
+	uint8_t xmt[4][4] = {{0x2, 0x6, 0x9, 0xC}, {0x8, 0xD, 0x3, 0x7}, {0x3, 0x7, 0x8, 0xD}, {0x9, 0xC, 0x2, 0x6}};
+	uint8_t t, h=0; 
+	t = S5B(NMT(0x8)) ^ S6B(NMT(0x9)) ^ S7B(NMT(0x7)) ^ S8B(NMT(0x6)) ^ S5B(XMT(0));
+		klo[offset*2] |= (t & 0x0f);
+		h |= (t&0x10); h>>=1;
+	t = S5B(NMT(0xA)) ^ S6B(NMT(0xB)) ^ S7B(NMT(0x5)) ^ S8B(NMT(0x4)) ^ S6B(XMT(1));
+		klo[offset*2] |= (t<<4) & 0xf0;
+		h |= t&0x10; h>>=1;
+	t = S5B(NMT(0xC)) ^ S6B(NMT(0xD)) ^ S7B(NMT(0x3)) ^ S8B(NMT(0x2)) ^ S7B(XMT(2));
+		klo[offset*2+1] |= t&0xf;
+		h |= t&0x10; h>>=1;
+	t = S5B(NMT(0xE)) ^ S6B(NMT(0xF)) ^ S7B(NMT(0x1)) ^ S8B(NMT(0x0)) ^ S8B(XMT(3));
+		klo[offset*2+1] |= t<<4;
+		h |= t&0x10; h >>=1;
+	#ifdef DEBUG
+		cli_putstr("\r\n\t h="); cli_hexdump(&h,1);
+	#endif
+	khi[offset>>1] |= h<<((offset&0x1)?4:0);
+}
+
+#define S_5X(s) pgm_read_dword(&s5[BPX[(s)]])
+#define S_6X(s) pgm_read_dword(&s6[BPX[(s)]])
+#define S_7X(s) pgm_read_dword(&s7[BPX[(s)]])
+#define S_8X(s) pgm_read_dword(&s8[BPX[(s)]])
+
+#define S_5Z(s) pgm_read_dword(&s5[BPZ[(s)]])
+#define S_6Z(s) pgm_read_dword(&s6[BPZ[(s)]])
+#define S_7Z(s) pgm_read_dword(&s7[BPZ[(s)]])
+#define S_8Z(s) pgm_read_dword(&s8[BPZ[(s)]])
+
+
+
+
+void cast5_init(const void* key, uint16_t keylength_b, cast5_ctx_t* s){
+ 	 /* we migth return if the key is valid and if setup was successful */
+	uint32_t x[4], z[4];
+	#define BPX ((uint8_t*)&(x[0]))
+	#define BPZ ((uint8_t*)&(z[0]))
+	s->shortkey = (keylength_b<=80);
+	/* littel endian only! */
+	memset(&(x[0]), 0 ,16); /* set x to zero */
+	if(keylength_b > 128)
+		keylength_b=128;
+	memcpy(&(x[0]), key, (keylength_b+7)/8);
+	
+
+	/* todo: merge a and b and compress the whole stuff */
+	/***** A *****/
+	cast5_init_A((uint8_t*)(&z[0]), (uint8_t*)(&x[0]), false);	
+	/***** M *****/
+	cast5_init_M((uint8_t*)(&(s->mask[0])), (uint8_t*)(&z[0]), false, false);
+	/***** B *****/
+	cast5_init_A((uint8_t*)(&x[0]), (uint8_t*)(&z[0]), true);
+	/***** N *****/
+	cast5_init_M((uint8_t*)(&(s->mask[4])), (uint8_t*)(&x[0]), true, false);
+	/***** A *****/
+	cast5_init_A((uint8_t*)(&z[0]), (uint8_t*)(&x[0]), false);
+	/***** N' *****/
+	cast5_init_M((uint8_t*)(&(s->mask[8])), (uint8_t*)(&z[0]), true, true);
+	/***** B *****/
+	cast5_init_A((uint8_t*)(&x[0]), (uint8_t*)(&z[0]), true);
+	/***** M' *****/
+	cast5_init_M((uint8_t*)(&(s->mask[12])), (uint8_t*)(&x[0]), false, true);
+	
+	/* that were the masking keys, now the rotation keys */
+	/* set the keys to zero */
+	memset(&(s->rotl[0]),0,8);
+	s->roth[0]=s->roth[1]=0;
+	/***** A *****/
+	cast5_init_A((uint8_t*)(&z[0]), (uint8_t*)(&x[0]), false);
+	/***** M *****/
+	cast5_init_rM(&(s->rotl[0]), &(s->roth[0]), 0, (uint8_t*)(&z[0]), false, false);
+	/***** B *****/
+	cast5_init_A((uint8_t*)(&x[0]), (uint8_t*)(&z[0]), true);
+	/***** N *****/
+	cast5_init_rM(&(s->rotl[0]), &(s->roth[0]), 1, (uint8_t*)(&x[0]), true, false);
+	/***** A *****/
+	cast5_init_A((uint8_t*)(&z[0]), (uint8_t*)(&x[0]), false);
+	/***** N' *****/
+	cast5_init_rM(&(s->rotl[0]), &(s->roth[0]), 2, (uint8_t*)(&z[0]), true, true);
+	/***** B *****/
+	cast5_init_A((uint8_t*)(&x[0]), (uint8_t*)(&z[0]), true);
+	/***** M' *****/
+	cast5_init_rM(&(s->rotl[0]), &(s->roth[0]), 3, (uint8_t*)(&x[0]), false, true);
+	/* done ;-) */
+}
+
+
+
+/********************************************************************************************************/
+
+#define ROTL32(a,n) ((a)<<(n) | (a)>>(32-(n)))
+#define CHANGE_ENDIAN32(x) ((x)<<24 | (x)>>24 | ((x)&0xff00)<<8 | ((x)&0xff0000)>>8 )
+
+typedef uint32_t cast5_f_t(uint32_t,uint32_t,uint8_t);
+
+#define IA 3
+#define IB 2
+#define IC 1
+#define ID 0
+
+static
+uint32_t cast5_f1(uint32_t d, uint32_t m, uint8_t r){
+	uint32_t t;
+	t = ROTL32((d + m),r);
+#ifdef DEBUG
+	uint32_t ia,ib,ic,id;
+	cli_putstr("\r\n f1("); cli_hexdump(&d, 4); cli_putc(',');
+		cli_hexdump(&m , 4); cli_putc(','); cli_hexdump(&r, 1);cli_putstr("): I=");
+		cli_hexdump(&t, 4);
+	ia = pgm_read_dword(&s1[((uint8_t*)&t)[IA]] );
+	ib = pgm_read_dword(&s2[((uint8_t*)&t)[IB]] );
+	ic = pgm_read_dword(&s3[((uint8_t*)&t)[IC]] );
+	id = pgm_read_dword(&s4[((uint8_t*)&t)[ID]] );
+	cli_putstr("\r\n\tIA="); cli_hexdump(&ia, 4);
+	cli_putstr("\r\n\tIB="); cli_hexdump(&ib, 4);
+	cli_putstr("\r\n\tIC="); cli_hexdump(&ic, 4);
+	cli_putstr("\r\n\tID="); cli_hexdump(&id, 4);
+
+	return (((ia ^ ib) - ic) + id);
+
+#else
+	
+	return (((  pgm_read_dword(&s1[((uint8_t*)&t)[IA]]) 
+                  ^ pgm_read_dword(&s2[((uint8_t*)&t)[IB]]) ) 
+		  - pgm_read_dword(&s3[((uint8_t*)&t)[IC]]) ) 
+                  + pgm_read_dword(&s4[((uint8_t*)&t)[ID]]) );
+
+#endif
+}
+
+static
+uint32_t cast5_f2(uint32_t d, uint32_t m, uint8_t r){
+	uint32_t t;
+	t = ROTL32((d ^ m),r);
+#ifdef DEBUG
+	uint32_t ia,ib,ic,id;
+	cli_putstr("\r\n f2("); cli_hexdump(&d, 4); cli_putc(',');
+		cli_hexdump(&m , 4); cli_putc(','); cli_hexdump(&r, 1);cli_putstr("): I=");
+		cli_hexdump(&t, 4);
+
+	ia = pgm_read_dword(&s1[((uint8_t*)&t)[IA]] );
+	ib = pgm_read_dword(&s2[((uint8_t*)&t)[IB]] );
+	ic = pgm_read_dword(&s3[((uint8_t*)&t)[IC]] );
+	id = pgm_read_dword(&s4[((uint8_t*)&t)[ID]] );
+	
+	cli_putstr("\r\n\tIA="); cli_hexdump(&ia, 4);
+	cli_putstr("\r\n\tIB="); cli_hexdump(&ib, 4);
+	cli_putstr("\r\n\tIC="); cli_hexdump(&ic, 4);
+	cli_putstr("\r\n\tID="); cli_hexdump(&id, 4);
+
+	return (((ia - ib) + ic) ^ id);
+#else
+	
+	return (((    pgm_read_dword(&s1[((uint8_t*)&t)[IA]]) 
+	            - pgm_read_dword(&s2[((uint8_t*)&t)[IB]]) ) 
+		    + pgm_read_dword(&s3[((uint8_t*)&t)[IC]]) ) 
+		    ^ pgm_read_dword(&s4[((uint8_t*)&t)[ID]]) );
+
+#endif
+}
+
+static
+uint32_t cast5_f3(uint32_t d, uint32_t m, uint8_t r){
+	uint32_t t;
+	t = ROTL32((m - d),r);
+
+#ifdef DEBUG
+	uint32_t ia,ib,ic,id;
+
+	cli_putstr("\r\n f3("); cli_hexdump(&d, 4); cli_putc(',');
+		cli_hexdump(&m , 4); cli_putc(','); cli_hexdump(&r, 1);cli_putstr("): I=");
+		cli_hexdump(&t, 4);
+
+	ia = pgm_read_dword(&s1[((uint8_t*)&t)[IA]] );
+	ib = pgm_read_dword(&s2[((uint8_t*)&t)[IB]] );
+	ic = pgm_read_dword(&s3[((uint8_t*)&t)[IC]] );
+	id = pgm_read_dword(&s4[((uint8_t*)&t)[ID]] );
+	
+	cli_putstr("\r\n\tIA="); cli_hexdump(&ia, 4);
+	cli_putstr("\r\n\tIB="); cli_hexdump(&ib, 4);
+	cli_putstr("\r\n\tIC="); cli_hexdump(&ic, 4);
+	cli_putstr("\r\n\tID="); cli_hexdump(&id, 4);
+	return (((ia + ib) ^ ic) - id);
+#else
+	return ((  pgm_read_dword(&s1[((uint8_t*)&t)[IA]] )
+                 + pgm_read_dword(&s2[((uint8_t*)&t)[IB]] )) 
+		 ^ pgm_read_dword(&s3[((uint8_t*)&t)[IC]] )) 
+                 - pgm_read_dword(&s4[((uint8_t*)&t)[ID]] );
+
+#endif
+}
+
+/******************************************************************************/
+
+void cast5_enc(void* block, const cast5_ctx_t *s){
+	uint32_t l,r, x, y;
+	uint8_t i;
+	cast5_f_t* f[]={cast5_f1,cast5_f2,cast5_f3};
+	l=((uint32_t*)block)[0];
+	r=((uint32_t*)block)[1];
+//	cli_putstr("\r\n round[-1] = ");
+//	cli_hexdump(&r, 4);
+	for (i=0;i<(s->shortkey?12:16);++i){
+		x = r;
+		y = (f[i%3])(CHANGE_ENDIAN32(r), CHANGE_ENDIAN32(s->mask[i]), 
+			(((s->roth[i>>3]) & (1<<(i&0x7)))?0x10:0x00) 
+			 + ( ((s->rotl[i>>1])>>((i&1)?4:0)) & 0x0f) );
+		r = l ^ CHANGE_ENDIAN32(y);
+//		cli_putstr("\r\n round["); DEBUG_B(i); cli_putstr("] = ");
+//		cli_hexdump(&r, 4);
+		l = x;
+	}
+	((uint32_t*)block)[0]=r;
+	((uint32_t*)block)[1]=l;
+}
+
+/******************************************************************************/
+
+void cast5_dec(void* block, const cast5_ctx_t *s){
+	uint32_t l,r, x, y;
+	int8_t i, rounds;
+	cast5_f_t* f[]={cast5_f1,cast5_f2,cast5_f3};
+	l=((uint32_t*)block)[0];
+	r=((uint32_t*)block)[1];
+	rounds = (s->shortkey?12:16);
+	for (i=rounds-1; i>=0 ;--i){
+		x = r;
+		y = (f[i%3])(CHANGE_ENDIAN32(r), CHANGE_ENDIAN32(s->mask[i]), 
+			(((s->roth[i>>3]) & (1<<(i&0x7)))?0x10:0x00) 
+			 + ( ((s->rotl[i>>1])>>((i&1)?4:0)) & 0x0f) );
+		r = l ^ CHANGE_ENDIAN32(y);
+		l = x;
+	}
+	((uint32_t*)block)[0]=r;
+	((uint32_t*)block)[1]=l;
+}
+
+
+/******************************************************************************/
+
+
+
+
diff --git a/cast5/cast5.h b/cast5/cast5.h
new file mode 100644
index 0000000..b014f7c
--- /dev/null
+++ b/cast5/cast5.h
@@ -0,0 +1,95 @@
+/* cast5.h */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/** 
+ * \file	cast5.h
+ * \author	Daniel Otte
+ * \date 	2006-07-26
+ * \license GPLv3 or later
+ * \brief Implementation of the CAST5 (aka CAST-128) cipher algorithm as described in RFC 2144
+ * 
+ */
+
+#ifndef CAST5_H_
+#define CAST5_H_ 
+
+#include <stdint.h> 
+
+#ifndef BOOL
+#define BOOL
+ #ifndef __BOOL
+ #define __BOOL
+  #ifndef __BOOL__
+  #define __BOOL__
+	typedef enum{false=0,true=1} bool;
+  #endif
+ #endif
+#endif
+
+/** \typedef cast5_ctx_t
+ * \brief CAST-5 context
+ * 
+ * A variable of this type may hold a keyschedule for the CAST-5 cipher. 
+ * This context is regulary generated by the 
+ * cast5_init(uint8_t* key, uint8_t keylength_b, cast5_ctx_t* s) function.
+ */
+typedef struct cast5_ctx_st{
+	uint32_t	mask[16];
+	uint8_t		rotl[8];	/* 4 bit from every rotation key is stored here */
+	uint8_t		roth[2];	/* 1 bit from every rotation key is stored here */
+	bool		shortkey;
+} cast5_ctx_t;
+
+
+/** \fn void cast5_init(const void* key, uint16_t keylength_b, cast5_ctx_t* s);
+ * \brief generate keyschedule/contex for CAST-5
+ * 
+ * This function generates the keyschedule from the supplied key for the 
+ * CAST-5 cipher and stores it in a supplied ::cast5_ctx_t context.
+ * \param key pointer to the key
+ * \param keylength_b length of the key in bits (maximum 128 bits)
+ * \param s pointer to the context
+ */
+void cast5_init(const void* key, uint16_t keylength_b, cast5_ctx_t* s);
+
+/** \fn void cast5_enc(void* block, const cast5_ctx_t* s);
+ * \brief encrypt a block with the CAST-5 algorithm
+ * 
+ * This function encrypts a block of 64 bits (8 bytes) with the CAST-5 algorithm.
+ * It uses a keyschedule as generated by the 
+ * cast5_init(void* key, uint8_t keylength_b, cast5_ctx_t* s) function.
+ * \param block pointer to the block which gets encrypted
+ * \param s pointer to the keyschedule/context
+ */
+void cast5_enc(void* block, const cast5_ctx_t* s);
+
+/** \fn void cast5_dec(void* block, const cast5_ctx_t* s);
+ * \brief decrypt a block with the CAST-5 algorithm
+ * 
+ * This function decrypts a block of 64 bits (8 bytes) with the CAST-5 algorithm.
+ * It uses a keyschedule as generated by the 
+ * cast5_init(void* key, uint8_t keylength_b, cast5_ctx_t* s) function.
+ * \param block pointer to the block which gets decrypted
+ * \param s pointer to the keyschedule/context
+ */
+void cast5_dec(void* block, const cast5_ctx_t* s);
+
+
+
+#endif
+
diff --git a/cast6.c b/cast6.c
deleted file mode 100644
index c4922fc..0000000
--- a/cast6.c
+++ /dev/null
@@ -1,267 +0,0 @@
-/* 
- * File:	cast6.c
- * Author:	Daniel Otte
- * Date: 	09.09.2006
- * License: GPL
- * Description: Implementation of the CAST6 (aka CAST-256) cipher algorithm as described in RFC 2612
- * 
- */
-
-#include <stdint.h>
-#include <string.h>
-#include <avr/pgmspace.h>
-#include "cast6.h"
-#include "cast6_sboxes.h"
-
-#define CHANGE_ENDIAN32(x) (((x)<<24 | (x)>>24 | ((x)&0xff00)<<8 | ((x)&0xff0000)>>8)&0xffffffff)
-
-
-static
-uint8_t kr(uint8_t i, const cast6_ctx_t* ctx){
-	uint8_t ret;
-	ret = ctx->krx[i/2];
-	if(i&1){
-		ret >>= 4;
-	}else{
-		ret &= 0x0f;
-	}
-	/* now get the high bit */
-	ret |= ((ctx->krx[24+i/8])&(1<<(i%8)))?0x10:0x00;
-	return ret;
-}
-
-static
-void set_kr(uint8_t value, uint8_t i, cast6_ctx_t* ctx){
-	value &= 0x1F;
-	
-	(ctx->krx[i/2]) &= 0xF0>>((i&1)*4); /* clear the location where v should go */
-	(ctx->krx[i/2]) |= (value&0x0f)<<((i&1)*4); 
-	
-	/* now set the high bit */
-	(ctx->krx[24+i/8]) &= ~(1<<(i%8)); /* clear the location where v should go */
-	(ctx->krx[24+i/8]) |= (value>>4)<<(i%8); 
-}
-
-#define ROTL32(a,n) (((a)<<(n))|((a)>>(32-(n))))
-#define ROTR32(a,n) (((a)>>(n))|((a)<<(32-(n))))
-
-#define S1(a) (pgm_read_dword(&(s1[(a)])))
-#define S2(a) (pgm_read_dword(&(s2[(a)])))
-#define S3(a) (pgm_read_dword(&(s3[(a)])))
-#define S4(a) (pgm_read_dword(&(s4[(a)])))
-
-#define A ((uint8_t)(v>>(8*3)))
-#define B ((uint8_t)(v>>(8*2)))
-#define C ((uint8_t)(v>>(8*1)))
-#define D ((uint8_t)(v>>(8*0)))
-
-
-static
-uint32_t f1(uint32_t v, uint8_t kri, uint32_t kmi){
-	uint32_t o;
-	kri &= 0x1F;
-	v  = ROTL32(kmi+v, kri);
-	o  = S1(A);
-	o ^= S2(B);
-	o -= S3(C);
-	o += S4(D);
-	return o;
-}
-
-static
-uint32_t f2(uint32_t v, uint8_t kri, uint32_t kmi){
-	uint32_t o;
-	kri &= 0x1F;
-	v  = ROTL32(kmi^v, kri);
-	o  = S1(A);
-	o -= S2(B);
-	o += S3(C);
-	o ^= S4(D);
-	return o;
-}
-
-static
-uint32_t f3(uint32_t v, uint8_t kri, uint32_t kmi){
-	uint32_t o;
-	kri &= 0x1F;
-	v  = ROTL32(kmi-v, kri);
-	o  = S1(A);
-	o += S2(B);
-	o ^= S3(C);
-	o -= S4(D);
-	return o;
-}
-
-#undef A
-#undef B
-#undef C
-#undef D
-
-#define A (((uint32_t*)buffer)[0])
-#define B (((uint32_t*)buffer)[1])
-#define C (((uint32_t*)buffer)[2])
-#define D (((uint32_t*)buffer)[3])
-
-static
-void q(void* buffer, uint8_t i, const cast6_ctx_t* ctx){
-	C ^= f1(D, kr(i*4+0, ctx), ctx->km[i][0]);
-	B ^= f2(C, kr(i*4+1, ctx), ctx->km[i][1]);
-	A ^= f3(B, kr(i*4+2, ctx), ctx->km[i][2]);
-	D ^= f1(A, kr(i*4+3, ctx), ctx->km[i][3]);
-}
-
-static
-void qbar(void* buffer, uint8_t i, const cast6_ctx_t* ctx){
-	D ^= f1(A, kr(i*4+3, ctx), ctx->km[i][3]);
-	A ^= f3(B, kr(i*4+2, ctx), ctx->km[i][2]);
-	B ^= f2(C, kr(i*4+1, ctx), ctx->km[i][1]);
-	C ^= f1(D, kr(i*4+0, ctx), ctx->km[i][0]);
-}
-
-void cast6_enc(void* buffer, const cast6_ctx_t* ctx){
-	uint8_t i;
-	for(i=0; i<32/4; ++i){
-		((uint32_t*)buffer)[i] = CHANGE_ENDIAN32(((uint32_t*)buffer)[i]);
-	}
-	for(i=0; i<6; ++i){
-		q(buffer, i, ctx);
-	}
-	for(i=6; i<12; ++i){
-		qbar(buffer, i, ctx);
-	}
-	for(i=0; i<32/4; ++i){
-		((uint32_t*)buffer)[i] = CHANGE_ENDIAN32(((uint32_t*)buffer)[i]);
-	}
-}
-
-void cast6_dec(void* buffer, const cast6_ctx_t* ctx){
-	uint8_t i;
-	for(i=0; i<32/4; ++i){
-		((uint32_t*)buffer)[i] = CHANGE_ENDIAN32(((uint32_t*)buffer)[i]);
-	}
-	for(i=12; i>6; --i){
-		q(buffer, i-1, ctx);
-	}
-	for(i=6; i>0; --i){
-		qbar(buffer, i-1, ctx);
-	}
-	for(i=0; i<32/4; ++i){
-		((uint32_t*)buffer)[i] = CHANGE_ENDIAN32(((uint32_t*)buffer)[i]);
-	}
-}
-
-
-#undef A
-#undef B
-#undef C
-#undef D
-
-#define A (((uint32_t*)buffer)[0])
-#define B (((uint32_t*)buffer)[1])
-#define C (((uint32_t*)buffer)[2])
-#define D (((uint32_t*)buffer)[3])
-#define E (((uint32_t*)buffer)[4])
-#define F (((uint32_t*)buffer)[5])
-#define G (((uint32_t*)buffer)[6])
-#define H (((uint32_t*)buffer)[7])
-
-/*
- * we might later make it optional to use this small thing
-static
-void w(void* buffer, uint8_t* tr, uint32_t* tm){
-	G ^= f1(H, (tr[0]&0x0f)+(tr[5]&0x01)?0x10:0x00, tm[0]);
-	F ^= f2(G, (tr[0]>>4)  +(tr[5]&0x02)?0x10:0x00, tm[1]);
-	E ^= f3(F, (tr[1]&0x0f)+(tr[5]&0x04)?0x10:0x00, tm[2]);
-	D ^= f1(E, (tr[1]>>4)  +(tr[5]&0x08)?0x10:0x00, tm[3]);
-	C ^= f2(D, (tr[2]&0x0f)+(tr[5]&0x10)?0x10:0x00, tm[4]);
-	B ^= f3(C, (tr[2]>>4)  +(tr[5]&0x20)?0x10:0x00, tm[5]);
-	A ^= f1(B, (tr[3]&0x0f)+(tr[5]&0x40)?0x10:0x00, tm[6]);
-	H ^= f2(A, (tr[3]>>4)  +(tr[5]&0x80)?0x10:0x00, tm[7]);
-}
-*/
-static
-void w(void* buffer, uint8_t* tr, uint32_t* tm){
-	G ^= f1(H, tr[0], tm[0]);
-	F ^= f2(G, tr[1], tm[1]);
-	E ^= f3(F, tr[2], tm[2]);
-	D ^= f1(E, tr[3], tm[3]);
-	C ^= f2(D, tr[4], tm[4]);
-	B ^= f3(C, tr[5], tm[5]);
-	A ^= f1(B, tr[6], tm[6]);
-	H ^= f2(A, tr[7], tm[7]);
-}
-
-/*
-void dump_ctx(const cast6_ctx_t* ctx){
-	uint8_t i,t;
-	cli_putstr_P(PSTR("\r\n DBG:"));
-	for(i=0; i<12; ++i){
-		cli_putstr_P(PSTR("\r\n DBG:"));
-		cli_putstr_P(PSTR(" rotk1="));
-		t=kr(i*4+0, ctx);
-		cli_hexdump(&t,1);
-		cli_putstr_P(PSTR("          rotk2="));
-		t=kr(i*4+1, ctx);
-		cli_hexdump(&t,1);
-		cli_putstr_P(PSTR("          rotk3="));
-		t=kr(i*4+2, ctx);
-		cli_hexdump(&t,1);
-		cli_putstr_P(PSTR("          rotk4="));
-		t=kr(i*4+3, ctx);
-		cli_hexdump(&t,1);
-		cli_putstr_P(PSTR("\r\n     "));
-		cli_putstr_P(PSTR(" mask1="));
-		cli_hexdump(&(ctx->km[i][0]),4);
-		cli_putstr_P(PSTR(" mask2="));
-		cli_hexdump(&(ctx->km[i][1]),4);
-		cli_putstr_P(PSTR(" mask3="));
-		cli_hexdump(&(ctx->km[i][2]),4);
-		cli_putstr_P(PSTR(" mask4="));
-		cli_hexdump(&(ctx->km[i][3]),4);
-		cli_putstr_P(PSTR("\r\n;-----"));
-	}
-}
-*/
-
-#define CR 19
-#define CM 0x5A827999
-#define MR 17 
-#define MM 0x6ED9EBA1
-
-void cast6_init(const void* key, uint16_t keysize_b, cast6_ctx_t* ctx){
-	uint8_t  buffer[32];
-	uint8_t  cr=CR, tr[8];
-	uint32_t cm=CM, tm[8];
-	uint8_t i,j;
-	
-	memset(buffer, 0, 32);
-	memcpy(buffer, key, (keysize_b+7)/8);
-	for(i=0; i<32/4; ++i){
-		((uint32_t*)buffer)[i] = CHANGE_ENDIAN32(((uint32_t*)buffer)[i]);
-	}
-
-	for(i=0; i<24; ++i){
-		for(j=0; j<8; ++j){
-			tm[j] = cm;
-			cm += MM;
-			tr[j] = cr&0x1F;
-			cr += MR;
-		}
-		w(buffer, tr, tm);
-
-		if(i&1){
-			j=i/2;
-			ctx->km[j][0]=H;
-			ctx->km[j][1]=F;
-			ctx->km[j][2]=D;
-			ctx->km[j][3]=B;
-			set_kr((uint8_t)A,j*4+0,ctx);
-			set_kr((uint8_t)C,j*4+1,ctx);
-			set_kr((uint8_t)E,j*4+2,ctx);
-			set_kr((uint8_t)G,j*4+3,ctx);
-		}
-	}
-}
-
-
-
diff --git a/cast6.h b/cast6.h
deleted file mode 100644
index dc3c7bb..0000000
--- a/cast6.h
+++ /dev/null
@@ -1,22 +0,0 @@
-#ifndef CAST6_H_
-#define CAST6_H_
-
-#include <stdint.h>
-
-#define CAST6_ROUNDS 12
-
-/* size of this is 222 byte (HUGE) */
-typedef struct cast6_ctx_st{
-	uint32_t	km[12][4];
-	uint8_t		krx[4*12*5/8]; /* these are packed */
-} cast6_ctx_t;
-
-
-
-void cast6_enc(void* buffer, const cast6_ctx_t* ctx);
-void cast6_dec(void* buffer, const cast6_ctx_t* ctx);
-void cast6_init(const void* key, uint16_t keysize_b, cast6_ctx_t* ctx);
-
-
-#endif /*CAST6_H_*/
-
diff --git a/cast6/cast6.c b/cast6/cast6.c
new file mode 100644
index 0000000..c4922fc
--- /dev/null
+++ b/cast6/cast6.c
@@ -0,0 +1,267 @@
+/* 
+ * File:	cast6.c
+ * Author:	Daniel Otte
+ * Date: 	09.09.2006
+ * License: GPL
+ * Description: Implementation of the CAST6 (aka CAST-256) cipher algorithm as described in RFC 2612
+ * 
+ */
+
+#include <stdint.h>
+#include <string.h>
+#include <avr/pgmspace.h>
+#include "cast6.h"
+#include "cast6_sboxes.h"
+
+#define CHANGE_ENDIAN32(x) (((x)<<24 | (x)>>24 | ((x)&0xff00)<<8 | ((x)&0xff0000)>>8)&0xffffffff)
+
+
+static
+uint8_t kr(uint8_t i, const cast6_ctx_t* ctx){
+	uint8_t ret;
+	ret = ctx->krx[i/2];
+	if(i&1){
+		ret >>= 4;
+	}else{
+		ret &= 0x0f;
+	}
+	/* now get the high bit */
+	ret |= ((ctx->krx[24+i/8])&(1<<(i%8)))?0x10:0x00;
+	return ret;
+}
+
+static
+void set_kr(uint8_t value, uint8_t i, cast6_ctx_t* ctx){
+	value &= 0x1F;
+	
+	(ctx->krx[i/2]) &= 0xF0>>((i&1)*4); /* clear the location where v should go */
+	(ctx->krx[i/2]) |= (value&0x0f)<<((i&1)*4); 
+	
+	/* now set the high bit */
+	(ctx->krx[24+i/8]) &= ~(1<<(i%8)); /* clear the location where v should go */
+	(ctx->krx[24+i/8]) |= (value>>4)<<(i%8); 
+}
+
+#define ROTL32(a,n) (((a)<<(n))|((a)>>(32-(n))))
+#define ROTR32(a,n) (((a)>>(n))|((a)<<(32-(n))))
+
+#define S1(a) (pgm_read_dword(&(s1[(a)])))
+#define S2(a) (pgm_read_dword(&(s2[(a)])))
+#define S3(a) (pgm_read_dword(&(s3[(a)])))
+#define S4(a) (pgm_read_dword(&(s4[(a)])))
+
+#define A ((uint8_t)(v>>(8*3)))
+#define B ((uint8_t)(v>>(8*2)))
+#define C ((uint8_t)(v>>(8*1)))
+#define D ((uint8_t)(v>>(8*0)))
+
+
+static
+uint32_t f1(uint32_t v, uint8_t kri, uint32_t kmi){
+	uint32_t o;
+	kri &= 0x1F;
+	v  = ROTL32(kmi+v, kri);
+	o  = S1(A);
+	o ^= S2(B);
+	o -= S3(C);
+	o += S4(D);
+	return o;
+}
+
+static
+uint32_t f2(uint32_t v, uint8_t kri, uint32_t kmi){
+	uint32_t o;
+	kri &= 0x1F;
+	v  = ROTL32(kmi^v, kri);
+	o  = S1(A);
+	o -= S2(B);
+	o += S3(C);
+	o ^= S4(D);
+	return o;
+}
+
+static
+uint32_t f3(uint32_t v, uint8_t kri, uint32_t kmi){
+	uint32_t o;
+	kri &= 0x1F;
+	v  = ROTL32(kmi-v, kri);
+	o  = S1(A);
+	o += S2(B);
+	o ^= S3(C);
+	o -= S4(D);
+	return o;
+}
+
+#undef A
+#undef B
+#undef C
+#undef D
+
+#define A (((uint32_t*)buffer)[0])
+#define B (((uint32_t*)buffer)[1])
+#define C (((uint32_t*)buffer)[2])
+#define D (((uint32_t*)buffer)[3])
+
+static
+void q(void* buffer, uint8_t i, const cast6_ctx_t* ctx){
+	C ^= f1(D, kr(i*4+0, ctx), ctx->km[i][0]);
+	B ^= f2(C, kr(i*4+1, ctx), ctx->km[i][1]);
+	A ^= f3(B, kr(i*4+2, ctx), ctx->km[i][2]);
+	D ^= f1(A, kr(i*4+3, ctx), ctx->km[i][3]);
+}
+
+static
+void qbar(void* buffer, uint8_t i, const cast6_ctx_t* ctx){
+	D ^= f1(A, kr(i*4+3, ctx), ctx->km[i][3]);
+	A ^= f3(B, kr(i*4+2, ctx), ctx->km[i][2]);
+	B ^= f2(C, kr(i*4+1, ctx), ctx->km[i][1]);
+	C ^= f1(D, kr(i*4+0, ctx), ctx->km[i][0]);
+}
+
+void cast6_enc(void* buffer, const cast6_ctx_t* ctx){
+	uint8_t i;
+	for(i=0; i<32/4; ++i){
+		((uint32_t*)buffer)[i] = CHANGE_ENDIAN32(((uint32_t*)buffer)[i]);
+	}
+	for(i=0; i<6; ++i){
+		q(buffer, i, ctx);
+	}
+	for(i=6; i<12; ++i){
+		qbar(buffer, i, ctx);
+	}
+	for(i=0; i<32/4; ++i){
+		((uint32_t*)buffer)[i] = CHANGE_ENDIAN32(((uint32_t*)buffer)[i]);
+	}
+}
+
+void cast6_dec(void* buffer, const cast6_ctx_t* ctx){
+	uint8_t i;
+	for(i=0; i<32/4; ++i){
+		((uint32_t*)buffer)[i] = CHANGE_ENDIAN32(((uint32_t*)buffer)[i]);
+	}
+	for(i=12; i>6; --i){
+		q(buffer, i-1, ctx);
+	}
+	for(i=6; i>0; --i){
+		qbar(buffer, i-1, ctx);
+	}
+	for(i=0; i<32/4; ++i){
+		((uint32_t*)buffer)[i] = CHANGE_ENDIAN32(((uint32_t*)buffer)[i]);
+	}
+}
+
+
+#undef A
+#undef B
+#undef C
+#undef D
+
+#define A (((uint32_t*)buffer)[0])
+#define B (((uint32_t*)buffer)[1])
+#define C (((uint32_t*)buffer)[2])
+#define D (((uint32_t*)buffer)[3])
+#define E (((uint32_t*)buffer)[4])
+#define F (((uint32_t*)buffer)[5])
+#define G (((uint32_t*)buffer)[6])
+#define H (((uint32_t*)buffer)[7])
+
+/*
+ * we might later make it optional to use this small thing
+static
+void w(void* buffer, uint8_t* tr, uint32_t* tm){
+	G ^= f1(H, (tr[0]&0x0f)+(tr[5]&0x01)?0x10:0x00, tm[0]);
+	F ^= f2(G, (tr[0]>>4)  +(tr[5]&0x02)?0x10:0x00, tm[1]);
+	E ^= f3(F, (tr[1]&0x0f)+(tr[5]&0x04)?0x10:0x00, tm[2]);
+	D ^= f1(E, (tr[1]>>4)  +(tr[5]&0x08)?0x10:0x00, tm[3]);
+	C ^= f2(D, (tr[2]&0x0f)+(tr[5]&0x10)?0x10:0x00, tm[4]);
+	B ^= f3(C, (tr[2]>>4)  +(tr[5]&0x20)?0x10:0x00, tm[5]);
+	A ^= f1(B, (tr[3]&0x0f)+(tr[5]&0x40)?0x10:0x00, tm[6]);
+	H ^= f2(A, (tr[3]>>4)  +(tr[5]&0x80)?0x10:0x00, tm[7]);
+}
+*/
+static
+void w(void* buffer, uint8_t* tr, uint32_t* tm){
+	G ^= f1(H, tr[0], tm[0]);
+	F ^= f2(G, tr[1], tm[1]);
+	E ^= f3(F, tr[2], tm[2]);
+	D ^= f1(E, tr[3], tm[3]);
+	C ^= f2(D, tr[4], tm[4]);
+	B ^= f3(C, tr[5], tm[5]);
+	A ^= f1(B, tr[6], tm[6]);
+	H ^= f2(A, tr[7], tm[7]);
+}
+
+/*
+void dump_ctx(const cast6_ctx_t* ctx){
+	uint8_t i,t;
+	cli_putstr_P(PSTR("\r\n DBG:"));
+	for(i=0; i<12; ++i){
+		cli_putstr_P(PSTR("\r\n DBG:"));
+		cli_putstr_P(PSTR(" rotk1="));
+		t=kr(i*4+0, ctx);
+		cli_hexdump(&t,1);
+		cli_putstr_P(PSTR("          rotk2="));
+		t=kr(i*4+1, ctx);
+		cli_hexdump(&t,1);
+		cli_putstr_P(PSTR("          rotk3="));
+		t=kr(i*4+2, ctx);
+		cli_hexdump(&t,1);
+		cli_putstr_P(PSTR("          rotk4="));
+		t=kr(i*4+3, ctx);
+		cli_hexdump(&t,1);
+		cli_putstr_P(PSTR("\r\n     "));
+		cli_putstr_P(PSTR(" mask1="));
+		cli_hexdump(&(ctx->km[i][0]),4);
+		cli_putstr_P(PSTR(" mask2="));
+		cli_hexdump(&(ctx->km[i][1]),4);
+		cli_putstr_P(PSTR(" mask3="));
+		cli_hexdump(&(ctx->km[i][2]),4);
+		cli_putstr_P(PSTR(" mask4="));
+		cli_hexdump(&(ctx->km[i][3]),4);
+		cli_putstr_P(PSTR("\r\n;-----"));
+	}
+}
+*/
+
+#define CR 19
+#define CM 0x5A827999
+#define MR 17 
+#define MM 0x6ED9EBA1
+
+void cast6_init(const void* key, uint16_t keysize_b, cast6_ctx_t* ctx){
+	uint8_t  buffer[32];
+	uint8_t  cr=CR, tr[8];
+	uint32_t cm=CM, tm[8];
+	uint8_t i,j;
+	
+	memset(buffer, 0, 32);
+	memcpy(buffer, key, (keysize_b+7)/8);
+	for(i=0; i<32/4; ++i){
+		((uint32_t*)buffer)[i] = CHANGE_ENDIAN32(((uint32_t*)buffer)[i]);
+	}
+
+	for(i=0; i<24; ++i){
+		for(j=0; j<8; ++j){
+			tm[j] = cm;
+			cm += MM;
+			tr[j] = cr&0x1F;
+			cr += MR;
+		}
+		w(buffer, tr, tm);
+
+		if(i&1){
+			j=i/2;
+			ctx->km[j][0]=H;
+			ctx->km[j][1]=F;
+			ctx->km[j][2]=D;
+			ctx->km[j][3]=B;
+			set_kr((uint8_t)A,j*4+0,ctx);
+			set_kr((uint8_t)C,j*4+1,ctx);
+			set_kr((uint8_t)E,j*4+2,ctx);
+			set_kr((uint8_t)G,j*4+3,ctx);
+		}
+	}
+}
+
+
+
diff --git a/cast6/cast6.h b/cast6/cast6.h
new file mode 100644
index 0000000..dc3c7bb
--- /dev/null
+++ b/cast6/cast6.h
@@ -0,0 +1,22 @@
+#ifndef CAST6_H_
+#define CAST6_H_
+
+#include <stdint.h>
+
+#define CAST6_ROUNDS 12
+
+/* size of this is 222 byte (HUGE) */
+typedef struct cast6_ctx_st{
+	uint32_t	km[12][4];
+	uint8_t		krx[4*12*5/8]; /* these are packed */
+} cast6_ctx_t;
+
+
+
+void cast6_enc(void* buffer, const cast6_ctx_t* ctx);
+void cast6_dec(void* buffer, const cast6_ctx_t* ctx);
+void cast6_init(const void* key, uint16_t keysize_b, cast6_ctx_t* ctx);
+
+
+#endif /*CAST6_H_*/
+
diff --git a/cast6/cast6_sboxes.h b/cast6/cast6_sboxes.h
new file mode 100644
index 0000000..88bd81e
--- /dev/null
+++ b/cast6/cast6_sboxes.h
@@ -0,0 +1,144 @@
+#ifndef CAST6_SBOXES_H_
+#define CAST6_SBOXES_H_
+
+
+uint32_t s1[] PROGMEM = {
+0x30fb40d4UL, 0x9fa0ff0bUL, 0x6beccd2fUL, 0x3f258c7aUL, 0x1e213f2fUL, 0x9c004dd3UL, 0x6003e540UL, 0xcf9fc949UL,
+0xbfd4af27UL, 0x88bbbdb5UL, 0xe2034090UL, 0x98d09675UL, 0x6e63a0e0UL, 0x15c361d2UL, 0xc2e7661dUL, 0x22d4ff8eUL,
+0x28683b6fUL, 0xc07fd059UL, 0xff2379c8UL, 0x775f50e2UL, 0x43c340d3UL, 0xdf2f8656UL, 0x887ca41aUL, 0xa2d2bd2dUL,
+0xa1c9e0d6UL, 0x346c4819UL, 0x61b76d87UL, 0x22540f2fUL, 0x2abe32e1UL, 0xaa54166bUL, 0x22568e3aUL, 0xa2d341d0UL,
+0x66db40c8UL, 0xa784392fUL, 0x004dff2fUL, 0x2db9d2deUL, 0x97943facUL, 0x4a97c1d8UL, 0x527644b7UL, 0xb5f437a7UL,
+0xb82cbaefUL, 0xd751d159UL, 0x6ff7f0edUL, 0x5a097a1fUL, 0x827b68d0UL, 0x90ecf52eUL, 0x22b0c054UL, 0xbc8e5935UL,
+0x4b6d2f7fUL, 0x50bb64a2UL, 0xd2664910UL, 0xbee5812dUL, 0xb7332290UL, 0xe93b159fUL, 0xb48ee411UL, 0x4bff345dUL,
+0xfd45c240UL, 0xad31973fUL, 0xc4f6d02eUL, 0x55fc8165UL, 0xd5b1caadUL, 0xa1ac2daeUL, 0xa2d4b76dUL, 0xc19b0c50UL,
+0x882240f2UL, 0x0c6e4f38UL, 0xa4e4bfd7UL, 0x4f5ba272UL, 0x564c1d2fUL, 0xc59c5319UL, 0xb949e354UL, 0xb04669feUL,
+0xb1b6ab8aUL, 0xc71358ddUL, 0x6385c545UL, 0x110f935dUL, 0x57538ad5UL, 0x6a390493UL, 0xe63d37e0UL, 0x2a54f6b3UL,
+0x3a787d5fUL, 0x6276a0b5UL, 0x19a6fcdfUL, 0x7a42206aUL, 0x29f9d4d5UL, 0xf61b1891UL, 0xbb72275eUL, 0xaa508167UL,
+0x38901091UL, 0xc6b505ebUL, 0x84c7cb8cUL, 0x2ad75a0fUL, 0x874a1427UL, 0xa2d1936bUL, 0x2ad286afUL, 0xaa56d291UL,
+0xd7894360UL, 0x425c750dUL, 0x93b39e26UL, 0x187184c9UL, 0x6c00b32dUL, 0x73e2bb14UL, 0xa0bebc3cUL, 0x54623779UL,
+0x64459eabUL, 0x3f328b82UL, 0x7718cf82UL, 0x59a2cea6UL, 0x04ee002eUL, 0x89fe78e6UL, 0x3fab0950UL, 0x325ff6c2UL,
+0x81383f05UL, 0x6963c5c8UL, 0x76cb5ad6UL, 0xd49974c9UL, 0xca180dcfUL, 0x380782d5UL, 0xc7fa5cf6UL, 0x8ac31511UL,
+0x35e79e13UL, 0x47da91d0UL, 0xf40f9086UL, 0xa7e2419eUL, 0x31366241UL, 0x051ef495UL, 0xaa573b04UL, 0x4a805d8dUL,
+0x548300d0UL, 0x00322a3cUL, 0xbf64cddfUL, 0xba57a68eUL, 0x75c6372bUL, 0x50afd341UL, 0xa7c13275UL, 0x915a0bf5UL,
+0x6b54bfabUL, 0x2b0b1426UL, 0xab4cc9d7UL, 0x449ccd82UL, 0xf7fbf265UL, 0xab85c5f3UL, 0x1b55db94UL, 0xaad4e324UL,
+0xcfa4bd3fUL, 0x2deaa3e2UL, 0x9e204d02UL, 0xc8bd25acUL, 0xeadf55b3UL, 0xd5bd9e98UL, 0xe31231b2UL, 0x2ad5ad6cUL,
+0x954329deUL, 0xadbe4528UL, 0xd8710f69UL, 0xaa51c90fUL, 0xaa786bf6UL, 0x22513f1eUL, 0xaa51a79bUL, 0x2ad344ccUL,
+0x7b5a41f0UL, 0xd37cfbadUL, 0x1b069505UL, 0x41ece491UL, 0xb4c332e6UL, 0x032268d4UL, 0xc9600accUL, 0xce387e6dUL,
+0xbf6bb16cUL, 0x6a70fb78UL, 0x0d03d9c9UL, 0xd4df39deUL, 0xe01063daUL, 0x4736f464UL, 0x5ad328d8UL, 0xb347cc96UL,
+0x75bb0fc3UL, 0x98511bfbUL, 0x4ffbcc35UL, 0xb58bcf6aUL, 0xe11f0abcUL, 0xbfc5fe4aUL, 0xa70aec10UL, 0xac39570aUL,
+0x3f04442fUL, 0x6188b153UL, 0xe0397a2eUL, 0x5727cb79UL, 0x9ceb418fUL, 0x1cacd68dUL, 0x2ad37c96UL, 0x0175cb9dUL,
+0xc69dff09UL, 0xc75b65f0UL, 0xd9db40d8UL, 0xec0e7779UL, 0x4744ead4UL, 0xb11c3274UL, 0xdd24cb9eUL, 0x7e1c54bdUL,
+0xf01144f9UL, 0xd2240eb1UL, 0x9675b3fdUL, 0xa3ac3755UL, 0xd47c27afUL, 0x51c85f4dUL, 0x56907596UL, 0xa5bb15e6UL,
+0x580304f0UL, 0xca042cf1UL, 0x011a37eaUL, 0x8dbfaadbUL, 0x35ba3e4aUL, 0x3526ffa0UL, 0xc37b4d09UL, 0xbc306ed9UL,
+0x98a52666UL, 0x5648f725UL, 0xff5e569dUL, 0x0ced63d0UL, 0x7c63b2cfUL, 0x700b45e1UL, 0xd5ea50f1UL, 0x85a92872UL,
+0xaf1fbda7UL, 0xd4234870UL, 0xa7870bf3UL, 0x2d3b4d79UL, 0x42e04198UL, 0x0cd0ede7UL, 0x26470db8UL, 0xf881814cUL,
+0x474d6ad7UL, 0x7c0c5e5cUL, 0xd1231959UL, 0x381b7298UL, 0xf5d2f4dbUL, 0xab838653UL, 0x6e2f1e23UL, 0x83719c9eUL,
+0xbd91e046UL, 0x9a56456eUL, 0xdc39200cUL, 0x20c8c571UL, 0x962bda1cUL, 0xe1e696ffUL, 0xb141ab08UL, 0x7cca89b9UL,
+0x1a69e783UL, 0x02cc4843UL, 0xa2f7c579UL, 0x429ef47dUL, 0x427b169cUL, 0x5ac9f049UL, 0xdd8f0f00UL, 0x5c8165bfUL};
+
+uint32_t s2[] PROGMEM = {
+0x1f201094UL, 0xef0ba75bUL, 0x69e3cf7eUL, 0x393f4380UL, 0xfe61cf7aUL, 0xeec5207aUL, 0x55889c94UL, 0x72fc0651UL,
+0xada7ef79UL, 0x4e1d7235UL, 0xd55a63ceUL, 0xde0436baUL, 0x99c430efUL, 0x5f0c0794UL, 0x18dcdb7dUL, 0xa1d6eff3UL,
+0xa0b52f7bUL, 0x59e83605UL, 0xee15b094UL, 0xe9ffd909UL, 0xdc440086UL, 0xef944459UL, 0xba83ccb3UL, 0xe0c3cdfbUL,
+0xd1da4181UL, 0x3b092ab1UL, 0xf997f1c1UL, 0xa5e6cf7bUL, 0x01420ddbUL, 0xe4e7ef5bUL, 0x25a1ff41UL, 0xe180f806UL,
+0x1fc41080UL, 0x179bee7aUL, 0xd37ac6a9UL, 0xfe5830a4UL, 0x98de8b7fUL, 0x77e83f4eUL, 0x79929269UL, 0x24fa9f7bUL,
+0xe113c85bUL, 0xacc40083UL, 0xd7503525UL, 0xf7ea615fUL, 0x62143154UL, 0x0d554b63UL, 0x5d681121UL, 0xc866c359UL,
+0x3d63cf73UL, 0xcee234c0UL, 0xd4d87e87UL, 0x5c672b21UL, 0x071f6181UL, 0x39f7627fUL, 0x361e3084UL, 0xe4eb573bUL,
+0x602f64a4UL, 0xd63acd9cUL, 0x1bbc4635UL, 0x9e81032dUL, 0x2701f50cUL, 0x99847ab4UL, 0xa0e3df79UL, 0xba6cf38cUL,
+0x10843094UL, 0x2537a95eUL, 0xf46f6ffeUL, 0xa1ff3b1fUL, 0x208cfb6aUL, 0x8f458c74UL, 0xd9e0a227UL, 0x4ec73a34UL,
+0xfc884f69UL, 0x3e4de8dfUL, 0xef0e0088UL, 0x3559648dUL, 0x8a45388cUL, 0x1d804366UL, 0x721d9bfdUL, 0xa58684bbUL,
+0xe8256333UL, 0x844e8212UL, 0x128d8098UL, 0xfed33fb4UL, 0xce280ae1UL, 0x27e19ba5UL, 0xd5a6c252UL, 0xe49754bdUL,
+0xc5d655ddUL, 0xeb667064UL, 0x77840b4dUL, 0xa1b6a801UL, 0x84db26a9UL, 0xe0b56714UL, 0x21f043b7UL, 0xe5d05860UL,
+0x54f03084UL, 0x066ff472UL, 0xa31aa153UL, 0xdadc4755UL, 0xb5625dbfUL, 0x68561be6UL, 0x83ca6b94UL, 0x2d6ed23bUL,
+0xeccf01dbUL, 0xa6d3d0baUL, 0xb6803d5cUL, 0xaf77a709UL, 0x33b4a34cUL, 0x397bc8d6UL, 0x5ee22b95UL, 0x5f0e5304UL,
+0x81ed6f61UL, 0x20e74364UL, 0xb45e1378UL, 0xde18639bUL, 0x881ca122UL, 0xb96726d1UL, 0x8049a7e8UL, 0x22b7da7bUL,
+0x5e552d25UL, 0x5272d237UL, 0x79d2951cUL, 0xc60d894cUL, 0x488cb402UL, 0x1ba4fe5bUL, 0xa4b09f6bUL, 0x1ca815cfUL,
+0xa20c3005UL, 0x8871df63UL, 0xb9de2fcbUL, 0x0cc6c9e9UL, 0x0beeff53UL, 0xe3214517UL, 0xb4542835UL, 0x9f63293cUL,
+0xee41e729UL, 0x6e1d2d7cUL, 0x50045286UL, 0x1e6685f3UL, 0xf33401c6UL, 0x30a22c95UL, 0x31a70850UL, 0x60930f13UL,
+0x73f98417UL, 0xa1269859UL, 0xec645c44UL, 0x52c877a9UL, 0xcdff33a6UL, 0xa02b1741UL, 0x7cbad9a2UL, 0x2180036fUL,
+0x50d99c08UL, 0xcb3f4861UL, 0xc26bd765UL, 0x64a3f6abUL, 0x80342676UL, 0x25a75e7bUL, 0xe4e6d1fcUL, 0x20c710e6UL,
+0xcdf0b680UL, 0x17844d3bUL, 0x31eef84dUL, 0x7e0824e4UL, 0x2ccb49ebUL, 0x846a3baeUL, 0x8ff77888UL, 0xee5d60f6UL,
+0x7af75673UL, 0x2fdd5cdbUL, 0xa11631c1UL, 0x30f66f43UL, 0xb3faec54UL, 0x157fd7faUL, 0xef8579ccUL, 0xd152de58UL,
+0xdb2ffd5eUL, 0x8f32ce19UL, 0x306af97aUL, 0x02f03ef8UL, 0x99319ad5UL, 0xc242fa0fUL, 0xa7e3ebb0UL, 0xc68e4906UL,
+0xb8da230cUL, 0x80823028UL, 0xdcdef3c8UL, 0xd35fb171UL, 0x088a1bc8UL, 0xbec0c560UL, 0x61a3c9e8UL, 0xbca8f54dUL,
+0xc72feffaUL, 0x22822e99UL, 0x82c570b4UL, 0xd8d94e89UL, 0x8b1c34bcUL, 0x301e16e6UL, 0x273be979UL, 0xb0ffeaa6UL,
+0x61d9b8c6UL, 0x00b24869UL, 0xb7ffce3fUL, 0x08dc283bUL, 0x43daf65aUL, 0xf7e19798UL, 0x7619b72fUL, 0x8f1c9ba4UL,
+0xdc8637a0UL, 0x16a7d3b1UL, 0x9fc393b7UL, 0xa7136eebUL, 0xc6bcc63eUL, 0x1a513742UL, 0xef6828bcUL, 0x520365d6UL,
+0x2d6a77abUL, 0x3527ed4bUL, 0x821fd216UL, 0x095c6e2eUL, 0xdb92f2fbUL, 0x5eea29cbUL, 0x145892f5UL, 0x91584f7fUL,
+0x5483697bUL, 0x2667a8ccUL, 0x85196048UL, 0x8c4baceaUL, 0x833860d4UL, 0x0d23e0f9UL, 0x6c387e8aUL, 0x0ae6d249UL,
+0xb284600cUL, 0xd835731dUL, 0xdcb1c647UL, 0xac4c56eaUL, 0x3ebd81b3UL, 0x230eabb0UL, 0x6438bc87UL, 0xf0b5b1faUL,
+0x8f5ea2b3UL, 0xfc184642UL, 0x0a036b7aUL, 0x4fb089bdUL, 0x649da589UL, 0xa345415eUL, 0x5c038323UL, 0x3e5d3bb9UL,
+0x43d79572UL, 0x7e6dd07cUL, 0x06dfdf1eUL, 0x6c6cc4efUL, 0x7160a539UL, 0x73bfbe70UL, 0x83877605UL, 0x4523ecf1UL};
+
+uint32_t s3[] PROGMEM = {
+0x8defc240UL, 0x25fa5d9fUL, 0xeb903dbfUL, 0xe810c907UL, 0x47607fffUL, 0x369fe44bUL, 0x8c1fc644UL, 0xaececa90UL,
+0xbeb1f9bfUL, 0xeefbcaeaUL, 0xe8cf1950UL, 0x51df07aeUL, 0x920e8806UL, 0xf0ad0548UL, 0xe13c8d83UL, 0x927010d5UL,
+0x11107d9fUL, 0x07647db9UL, 0xb2e3e4d4UL, 0x3d4f285eUL, 0xb9afa820UL, 0xfade82e0UL, 0xa067268bUL, 0x8272792eUL,
+0x553fb2c0UL, 0x489ae22bUL, 0xd4ef9794UL, 0x125e3fbcUL, 0x21fffceeUL, 0x825b1bfdUL, 0x9255c5edUL, 0x1257a240UL,
+0x4e1a8302UL, 0xbae07fffUL, 0x528246e7UL, 0x8e57140eUL, 0x3373f7bfUL, 0x8c9f8188UL, 0xa6fc4ee8UL, 0xc982b5a5UL,
+0xa8c01db7UL, 0x579fc264UL, 0x67094f31UL, 0xf2bd3f5fUL, 0x40fff7c1UL, 0x1fb78dfcUL, 0x8e6bd2c1UL, 0x437be59bUL,
+0x99b03dbfUL, 0xb5dbc64bUL, 0x638dc0e6UL, 0x55819d99UL, 0xa197c81cUL, 0x4a012d6eUL, 0xc5884a28UL, 0xccc36f71UL,
+0xb843c213UL, 0x6c0743f1UL, 0x8309893cUL, 0x0feddd5fUL, 0x2f7fe850UL, 0xd7c07f7eUL, 0x02507fbfUL, 0x5afb9a04UL,
+0xa747d2d0UL, 0x1651192eUL, 0xaf70bf3eUL, 0x58c31380UL, 0x5f98302eUL, 0x727cc3c4UL, 0x0a0fb402UL, 0x0f7fef82UL,
+0x8c96fdadUL, 0x5d2c2aaeUL, 0x8ee99a49UL, 0x50da88b8UL, 0x8427f4a0UL, 0x1eac5790UL, 0x796fb449UL, 0x8252dc15UL,
+0xefbd7d9bUL, 0xa672597dUL, 0xada840d8UL, 0x45f54504UL, 0xfa5d7403UL, 0xe83ec305UL, 0x4f91751aUL, 0x925669c2UL,
+0x23efe941UL, 0xa903f12eUL, 0x60270df2UL, 0x0276e4b6UL, 0x94fd6574UL, 0x927985b2UL, 0x8276dbcbUL, 0x02778176UL,
+0xf8af918dUL, 0x4e48f79eUL, 0x8f616ddfUL, 0xe29d840eUL, 0x842f7d83UL, 0x340ce5c8UL, 0x96bbb682UL, 0x93b4b148UL,
+0xef303cabUL, 0x984faf28UL, 0x779faf9bUL, 0x92dc560dUL, 0x224d1e20UL, 0x8437aa88UL, 0x7d29dc96UL, 0x2756d3dcUL,
+0x8b907ceeUL, 0xb51fd240UL, 0xe7c07ce3UL, 0xe566b4a1UL, 0xc3e9615eUL, 0x3cf8209dUL, 0x6094d1e3UL, 0xcd9ca341UL,
+0x5c76460eUL, 0x00ea983bUL, 0xd4d67881UL, 0xfd47572cUL, 0xf76cedd9UL, 0xbda8229cUL, 0x127dadaaUL, 0x438a074eUL,
+0x1f97c090UL, 0x081bdb8aUL, 0x93a07ebeUL, 0xb938ca15UL, 0x97b03cffUL, 0x3dc2c0f8UL, 0x8d1ab2ecUL, 0x64380e51UL,
+0x68cc7bfbUL, 0xd90f2788UL, 0x12490181UL, 0x5de5ffd4UL, 0xdd7ef86aUL, 0x76a2e214UL, 0xb9a40368UL, 0x925d958fUL,
+0x4b39fffaUL, 0xba39aee9UL, 0xa4ffd30bUL, 0xfaf7933bUL, 0x6d498623UL, 0x193cbcfaUL, 0x27627545UL, 0x825cf47aUL,
+0x61bd8ba0UL, 0xd11e42d1UL, 0xcead04f4UL, 0x127ea392UL, 0x10428db7UL, 0x8272a972UL, 0x9270c4a8UL, 0x127de50bUL,
+0x285ba1c8UL, 0x3c62f44fUL, 0x35c0eaa5UL, 0xe805d231UL, 0x428929fbUL, 0xb4fcdf82UL, 0x4fb66a53UL, 0x0e7dc15bUL,
+0x1f081fabUL, 0x108618aeUL, 0xfcfd086dUL, 0xf9ff2889UL, 0x694bcc11UL, 0x236a5caeUL, 0x12deca4dUL, 0x2c3f8cc5UL,
+0xd2d02dfeUL, 0xf8ef5896UL, 0xe4cf52daUL, 0x95155b67UL, 0x494a488cUL, 0xb9b6a80cUL, 0x5c8f82bcUL, 0x89d36b45UL,
+0x3a609437UL, 0xec00c9a9UL, 0x44715253UL, 0x0a874b49UL, 0xd773bc40UL, 0x7c34671cUL, 0x02717ef6UL, 0x4feb5536UL,
+0xa2d02fffUL, 0xd2bf60c4UL, 0xd43f03c0UL, 0x50b4ef6dUL, 0x07478cd1UL, 0x006e1888UL, 0xa2e53f55UL, 0xb9e6d4bcUL,
+0xa2048016UL, 0x97573833UL, 0xd7207d67UL, 0xde0f8f3dUL, 0x72f87b33UL, 0xabcc4f33UL, 0x7688c55dUL, 0x7b00a6b0UL,
+0x947b0001UL, 0x570075d2UL, 0xf9bb88f8UL, 0x8942019eUL, 0x4264a5ffUL, 0x856302e0UL, 0x72dbd92bUL, 0xee971b69UL,
+0x6ea22fdeUL, 0x5f08ae2bUL, 0xaf7a616dUL, 0xe5c98767UL, 0xcf1febd2UL, 0x61efc8c2UL, 0xf1ac2571UL, 0xcc8239c2UL,
+0x67214cb8UL, 0xb1e583d1UL, 0xb7dc3e62UL, 0x7f10bdceUL, 0xf90a5c38UL, 0x0ff0443dUL, 0x606e6dc6UL, 0x60543a49UL,
+0x5727c148UL, 0x2be98a1dUL, 0x8ab41738UL, 0x20e1be24UL, 0xaf96da0fUL, 0x68458425UL, 0x99833be5UL, 0x600d457dUL,
+0x282f9350UL, 0x8334b362UL, 0xd91d1120UL, 0x2b6d8da0UL, 0x642b1e31UL, 0x9c305a00UL, 0x52bce688UL, 0x1b03588aUL,
+0xf7baefd5UL, 0x4142ed9cUL, 0xa4315c11UL, 0x83323ec5UL, 0xdfef4636UL, 0xa133c501UL, 0xe9d3531cUL, 0xee353783UL};
+
+uint32_t s4[] PROGMEM = {
+0x9db30420UL, 0x1fb6e9deUL, 0xa7be7befUL, 0xd273a298UL, 0x4a4f7bdbUL, 0x64ad8c57UL, 0x85510443UL, 0xfa020ed1UL,
+0x7e287affUL, 0xe60fb663UL, 0x095f35a1UL, 0x79ebf120UL, 0xfd059d43UL, 0x6497b7b1UL, 0xf3641f63UL, 0x241e4adfUL,
+0x28147f5fUL, 0x4fa2b8cdUL, 0xc9430040UL, 0x0cc32220UL, 0xfdd30b30UL, 0xc0a5374fUL, 0x1d2d00d9UL, 0x24147b15UL,
+0xee4d111aUL, 0x0fca5167UL, 0x71ff904cUL, 0x2d195ffeUL, 0x1a05645fUL, 0x0c13fefeUL, 0x081b08caUL, 0x05170121UL,
+0x80530100UL, 0xe83e5efeUL, 0xac9af4f8UL, 0x7fe72701UL, 0xd2b8ee5fUL, 0x06df4261UL, 0xbb9e9b8aUL, 0x7293ea25UL,
+0xce84ffdfUL, 0xf5718801UL, 0x3dd64b04UL, 0xa26f263bUL, 0x7ed48400UL, 0x547eebe6UL, 0x446d4ca0UL, 0x6cf3d6f5UL,
+0x2649abdfUL, 0xaea0c7f5UL, 0x36338cc1UL, 0x503f7e93UL, 0xd3772061UL, 0x11b638e1UL, 0x72500e03UL, 0xf80eb2bbUL,
+0xabe0502eUL, 0xec8d77deUL, 0x57971e81UL, 0xe14f6746UL, 0xc9335400UL, 0x6920318fUL, 0x081dbb99UL, 0xffc304a5UL,
+0x4d351805UL, 0x7f3d5ce3UL, 0xa6c866c6UL, 0x5d5bcca9UL, 0xdaec6feaUL, 0x9f926f91UL, 0x9f46222fUL, 0x3991467dUL,
+0xa5bf6d8eUL, 0x1143c44fUL, 0x43958302UL, 0xd0214eebUL, 0x022083b8UL, 0x3fb6180cUL, 0x18f8931eUL, 0x281658e6UL,
+0x26486e3eUL, 0x8bd78a70UL, 0x7477e4c1UL, 0xb506e07cUL, 0xf32d0a25UL, 0x79098b02UL, 0xe4eabb81UL, 0x28123b23UL,
+0x69dead38UL, 0x1574ca16UL, 0xdf871b62UL, 0x211c40b7UL, 0xa51a9ef9UL, 0x0014377bUL, 0x041e8ac8UL, 0x09114003UL,
+0xbd59e4d2UL, 0xe3d156d5UL, 0x4fe876d5UL, 0x2f91a340UL, 0x557be8deUL, 0x00eae4a7UL, 0x0ce5c2ecUL, 0x4db4bba6UL,
+0xe756bdffUL, 0xdd3369acUL, 0xec17b035UL, 0x06572327UL, 0x99afc8b0UL, 0x56c8c391UL, 0x6b65811cUL, 0x5e146119UL,
+0x6e85cb75UL, 0xbe07c002UL, 0xc2325577UL, 0x893ff4ecUL, 0x5bbfc92dUL, 0xd0ec3b25UL, 0xb7801ab7UL, 0x8d6d3b24UL,
+0x20c763efUL, 0xc366a5fcUL, 0x9c382880UL, 0x0ace3205UL, 0xaac9548aUL, 0xeca1d7c7UL, 0x041afa32UL, 0x1d16625aUL,
+0x6701902cUL, 0x9b757a54UL, 0x31d477f7UL, 0x9126b031UL, 0x36cc6fdbUL, 0xc70b8b46UL, 0xd9e66a48UL, 0x56e55a79UL,
+0x026a4cebUL, 0x52437effUL, 0x2f8f76b4UL, 0x0df980a5UL, 0x8674cde3UL, 0xedda04ebUL, 0x17a9be04UL, 0x2c18f4dfUL,
+0xb7747f9dUL, 0xab2af7b4UL, 0xefc34d20UL, 0x2e096b7cUL, 0x1741a254UL, 0xe5b6a035UL, 0x213d42f6UL, 0x2c1c7c26UL,
+0x61c2f50fUL, 0x6552daf9UL, 0xd2c231f8UL, 0x25130f69UL, 0xd8167fa2UL, 0x0418f2c8UL, 0x001a96a6UL, 0x0d1526abUL,
+0x63315c21UL, 0x5e0a72ecUL, 0x49bafefdUL, 0x187908d9UL, 0x8d0dbd86UL, 0x311170a7UL, 0x3e9b640cUL, 0xcc3e10d7UL,
+0xd5cad3b6UL, 0x0caec388UL, 0xf73001e1UL, 0x6c728affUL, 0x71eae2a1UL, 0x1f9af36eUL, 0xcfcbd12fUL, 0xc1de8417UL,
+0xac07be6bUL, 0xcb44a1d8UL, 0x8b9b0f56UL, 0x013988c3UL, 0xb1c52fcaUL, 0xb4be31cdUL, 0xd8782806UL, 0x12a3a4e2UL,
+0x6f7de532UL, 0x58fd7eb6UL, 0xd01ee900UL, 0x24adffc2UL, 0xf4990fc5UL, 0x9711aac5UL, 0x001d7b95UL, 0x82e5e7d2UL,
+0x109873f6UL, 0x00613096UL, 0xc32d9521UL, 0xada121ffUL, 0x29908415UL, 0x7fbb977fUL, 0xaf9eb3dbUL, 0x29c9ed2aUL,
+0x5ce2a465UL, 0xa730f32cUL, 0xd0aa3fe8UL, 0x8a5cc091UL, 0xd49e2ce7UL, 0x0ce454a9UL, 0xd60acd86UL, 0x015f1919UL,
+0x77079103UL, 0xdea03af6UL, 0x78a8565eUL, 0xdee356dfUL, 0x21f05cbeUL, 0x8b75e387UL, 0xb3c50651UL, 0xb8a5c3efUL,
+0xd8eeb6d2UL, 0xe523be77UL, 0xc2154529UL, 0x2f69efdfUL, 0xafe67afbUL, 0xf470c4b2UL, 0xf3e0eb5bUL, 0xd6cc9876UL,
+0x39e4460cUL, 0x1fda8538UL, 0x1987832fUL, 0xca007367UL, 0xa99144f8UL, 0x296b299eUL, 0x492fc295UL, 0x9266beabUL,
+0xb5676e69UL, 0x9bd3dddaUL, 0xdf7e052fUL, 0xdb25701cUL, 0x1b5e51eeUL, 0xf65324e6UL, 0x6afce36cUL, 0x0316cc04UL,
+0x8644213eUL, 0xb7dc59d0UL, 0x7965291fUL, 0xccd6fd43UL, 0x41823979UL, 0x932bcdf6UL, 0xb657c34dUL, 0x4edfd282UL,
+0x7ae5290cUL, 0x3cb9536bUL, 0x851e20feUL, 0x9833557eUL, 0x13ecf0b0UL, 0xd3ffb372UL, 0x3f85c5c1UL, 0x0aef7ed2UL};
+
+
+
+
+#endif /*CAST6_SBOXES_H_*/
diff --git a/cast6_sboxes.h b/cast6_sboxes.h
deleted file mode 100644
index 88bd81e..0000000
--- a/cast6_sboxes.h
+++ /dev/null
@@ -1,144 +0,0 @@
-#ifndef CAST6_SBOXES_H_
-#define CAST6_SBOXES_H_
-
-
-uint32_t s1[] PROGMEM = {
-0x30fb40d4UL, 0x9fa0ff0bUL, 0x6beccd2fUL, 0x3f258c7aUL, 0x1e213f2fUL, 0x9c004dd3UL, 0x6003e540UL, 0xcf9fc949UL,
-0xbfd4af27UL, 0x88bbbdb5UL, 0xe2034090UL, 0x98d09675UL, 0x6e63a0e0UL, 0x15c361d2UL, 0xc2e7661dUL, 0x22d4ff8eUL,
-0x28683b6fUL, 0xc07fd059UL, 0xff2379c8UL, 0x775f50e2UL, 0x43c340d3UL, 0xdf2f8656UL, 0x887ca41aUL, 0xa2d2bd2dUL,
-0xa1c9e0d6UL, 0x346c4819UL, 0x61b76d87UL, 0x22540f2fUL, 0x2abe32e1UL, 0xaa54166bUL, 0x22568e3aUL, 0xa2d341d0UL,
-0x66db40c8UL, 0xa784392fUL, 0x004dff2fUL, 0x2db9d2deUL, 0x97943facUL, 0x4a97c1d8UL, 0x527644b7UL, 0xb5f437a7UL,
-0xb82cbaefUL, 0xd751d159UL, 0x6ff7f0edUL, 0x5a097a1fUL, 0x827b68d0UL, 0x90ecf52eUL, 0x22b0c054UL, 0xbc8e5935UL,
-0x4b6d2f7fUL, 0x50bb64a2UL, 0xd2664910UL, 0xbee5812dUL, 0xb7332290UL, 0xe93b159fUL, 0xb48ee411UL, 0x4bff345dUL,
-0xfd45c240UL, 0xad31973fUL, 0xc4f6d02eUL, 0x55fc8165UL, 0xd5b1caadUL, 0xa1ac2daeUL, 0xa2d4b76dUL, 0xc19b0c50UL,
-0x882240f2UL, 0x0c6e4f38UL, 0xa4e4bfd7UL, 0x4f5ba272UL, 0x564c1d2fUL, 0xc59c5319UL, 0xb949e354UL, 0xb04669feUL,
-0xb1b6ab8aUL, 0xc71358ddUL, 0x6385c545UL, 0x110f935dUL, 0x57538ad5UL, 0x6a390493UL, 0xe63d37e0UL, 0x2a54f6b3UL,
-0x3a787d5fUL, 0x6276a0b5UL, 0x19a6fcdfUL, 0x7a42206aUL, 0x29f9d4d5UL, 0xf61b1891UL, 0xbb72275eUL, 0xaa508167UL,
-0x38901091UL, 0xc6b505ebUL, 0x84c7cb8cUL, 0x2ad75a0fUL, 0x874a1427UL, 0xa2d1936bUL, 0x2ad286afUL, 0xaa56d291UL,
-0xd7894360UL, 0x425c750dUL, 0x93b39e26UL, 0x187184c9UL, 0x6c00b32dUL, 0x73e2bb14UL, 0xa0bebc3cUL, 0x54623779UL,
-0x64459eabUL, 0x3f328b82UL, 0x7718cf82UL, 0x59a2cea6UL, 0x04ee002eUL, 0x89fe78e6UL, 0x3fab0950UL, 0x325ff6c2UL,
-0x81383f05UL, 0x6963c5c8UL, 0x76cb5ad6UL, 0xd49974c9UL, 0xca180dcfUL, 0x380782d5UL, 0xc7fa5cf6UL, 0x8ac31511UL,
-0x35e79e13UL, 0x47da91d0UL, 0xf40f9086UL, 0xa7e2419eUL, 0x31366241UL, 0x051ef495UL, 0xaa573b04UL, 0x4a805d8dUL,
-0x548300d0UL, 0x00322a3cUL, 0xbf64cddfUL, 0xba57a68eUL, 0x75c6372bUL, 0x50afd341UL, 0xa7c13275UL, 0x915a0bf5UL,
-0x6b54bfabUL, 0x2b0b1426UL, 0xab4cc9d7UL, 0x449ccd82UL, 0xf7fbf265UL, 0xab85c5f3UL, 0x1b55db94UL, 0xaad4e324UL,
-0xcfa4bd3fUL, 0x2deaa3e2UL, 0x9e204d02UL, 0xc8bd25acUL, 0xeadf55b3UL, 0xd5bd9e98UL, 0xe31231b2UL, 0x2ad5ad6cUL,
-0x954329deUL, 0xadbe4528UL, 0xd8710f69UL, 0xaa51c90fUL, 0xaa786bf6UL, 0x22513f1eUL, 0xaa51a79bUL, 0x2ad344ccUL,
-0x7b5a41f0UL, 0xd37cfbadUL, 0x1b069505UL, 0x41ece491UL, 0xb4c332e6UL, 0x032268d4UL, 0xc9600accUL, 0xce387e6dUL,
-0xbf6bb16cUL, 0x6a70fb78UL, 0x0d03d9c9UL, 0xd4df39deUL, 0xe01063daUL, 0x4736f464UL, 0x5ad328d8UL, 0xb347cc96UL,
-0x75bb0fc3UL, 0x98511bfbUL, 0x4ffbcc35UL, 0xb58bcf6aUL, 0xe11f0abcUL, 0xbfc5fe4aUL, 0xa70aec10UL, 0xac39570aUL,
-0x3f04442fUL, 0x6188b153UL, 0xe0397a2eUL, 0x5727cb79UL, 0x9ceb418fUL, 0x1cacd68dUL, 0x2ad37c96UL, 0x0175cb9dUL,
-0xc69dff09UL, 0xc75b65f0UL, 0xd9db40d8UL, 0xec0e7779UL, 0x4744ead4UL, 0xb11c3274UL, 0xdd24cb9eUL, 0x7e1c54bdUL,
-0xf01144f9UL, 0xd2240eb1UL, 0x9675b3fdUL, 0xa3ac3755UL, 0xd47c27afUL, 0x51c85f4dUL, 0x56907596UL, 0xa5bb15e6UL,
-0x580304f0UL, 0xca042cf1UL, 0x011a37eaUL, 0x8dbfaadbUL, 0x35ba3e4aUL, 0x3526ffa0UL, 0xc37b4d09UL, 0xbc306ed9UL,
-0x98a52666UL, 0x5648f725UL, 0xff5e569dUL, 0x0ced63d0UL, 0x7c63b2cfUL, 0x700b45e1UL, 0xd5ea50f1UL, 0x85a92872UL,
-0xaf1fbda7UL, 0xd4234870UL, 0xa7870bf3UL, 0x2d3b4d79UL, 0x42e04198UL, 0x0cd0ede7UL, 0x26470db8UL, 0xf881814cUL,
-0x474d6ad7UL, 0x7c0c5e5cUL, 0xd1231959UL, 0x381b7298UL, 0xf5d2f4dbUL, 0xab838653UL, 0x6e2f1e23UL, 0x83719c9eUL,
-0xbd91e046UL, 0x9a56456eUL, 0xdc39200cUL, 0x20c8c571UL, 0x962bda1cUL, 0xe1e696ffUL, 0xb141ab08UL, 0x7cca89b9UL,
-0x1a69e783UL, 0x02cc4843UL, 0xa2f7c579UL, 0x429ef47dUL, 0x427b169cUL, 0x5ac9f049UL, 0xdd8f0f00UL, 0x5c8165bfUL};
-
-uint32_t s2[] PROGMEM = {
-0x1f201094UL, 0xef0ba75bUL, 0x69e3cf7eUL, 0x393f4380UL, 0xfe61cf7aUL, 0xeec5207aUL, 0x55889c94UL, 0x72fc0651UL,
-0xada7ef79UL, 0x4e1d7235UL, 0xd55a63ceUL, 0xde0436baUL, 0x99c430efUL, 0x5f0c0794UL, 0x18dcdb7dUL, 0xa1d6eff3UL,
-0xa0b52f7bUL, 0x59e83605UL, 0xee15b094UL, 0xe9ffd909UL, 0xdc440086UL, 0xef944459UL, 0xba83ccb3UL, 0xe0c3cdfbUL,
-0xd1da4181UL, 0x3b092ab1UL, 0xf997f1c1UL, 0xa5e6cf7bUL, 0x01420ddbUL, 0xe4e7ef5bUL, 0x25a1ff41UL, 0xe180f806UL,
-0x1fc41080UL, 0x179bee7aUL, 0xd37ac6a9UL, 0xfe5830a4UL, 0x98de8b7fUL, 0x77e83f4eUL, 0x79929269UL, 0x24fa9f7bUL,
-0xe113c85bUL, 0xacc40083UL, 0xd7503525UL, 0xf7ea615fUL, 0x62143154UL, 0x0d554b63UL, 0x5d681121UL, 0xc866c359UL,
-0x3d63cf73UL, 0xcee234c0UL, 0xd4d87e87UL, 0x5c672b21UL, 0x071f6181UL, 0x39f7627fUL, 0x361e3084UL, 0xe4eb573bUL,
-0x602f64a4UL, 0xd63acd9cUL, 0x1bbc4635UL, 0x9e81032dUL, 0x2701f50cUL, 0x99847ab4UL, 0xa0e3df79UL, 0xba6cf38cUL,
-0x10843094UL, 0x2537a95eUL, 0xf46f6ffeUL, 0xa1ff3b1fUL, 0x208cfb6aUL, 0x8f458c74UL, 0xd9e0a227UL, 0x4ec73a34UL,
-0xfc884f69UL, 0x3e4de8dfUL, 0xef0e0088UL, 0x3559648dUL, 0x8a45388cUL, 0x1d804366UL, 0x721d9bfdUL, 0xa58684bbUL,
-0xe8256333UL, 0x844e8212UL, 0x128d8098UL, 0xfed33fb4UL, 0xce280ae1UL, 0x27e19ba5UL, 0xd5a6c252UL, 0xe49754bdUL,
-0xc5d655ddUL, 0xeb667064UL, 0x77840b4dUL, 0xa1b6a801UL, 0x84db26a9UL, 0xe0b56714UL, 0x21f043b7UL, 0xe5d05860UL,
-0x54f03084UL, 0x066ff472UL, 0xa31aa153UL, 0xdadc4755UL, 0xb5625dbfUL, 0x68561be6UL, 0x83ca6b94UL, 0x2d6ed23bUL,
-0xeccf01dbUL, 0xa6d3d0baUL, 0xb6803d5cUL, 0xaf77a709UL, 0x33b4a34cUL, 0x397bc8d6UL, 0x5ee22b95UL, 0x5f0e5304UL,
-0x81ed6f61UL, 0x20e74364UL, 0xb45e1378UL, 0xde18639bUL, 0x881ca122UL, 0xb96726d1UL, 0x8049a7e8UL, 0x22b7da7bUL,
-0x5e552d25UL, 0x5272d237UL, 0x79d2951cUL, 0xc60d894cUL, 0x488cb402UL, 0x1ba4fe5bUL, 0xa4b09f6bUL, 0x1ca815cfUL,
-0xa20c3005UL, 0x8871df63UL, 0xb9de2fcbUL, 0x0cc6c9e9UL, 0x0beeff53UL, 0xe3214517UL, 0xb4542835UL, 0x9f63293cUL,
-0xee41e729UL, 0x6e1d2d7cUL, 0x50045286UL, 0x1e6685f3UL, 0xf33401c6UL, 0x30a22c95UL, 0x31a70850UL, 0x60930f13UL,
-0x73f98417UL, 0xa1269859UL, 0xec645c44UL, 0x52c877a9UL, 0xcdff33a6UL, 0xa02b1741UL, 0x7cbad9a2UL, 0x2180036fUL,
-0x50d99c08UL, 0xcb3f4861UL, 0xc26bd765UL, 0x64a3f6abUL, 0x80342676UL, 0x25a75e7bUL, 0xe4e6d1fcUL, 0x20c710e6UL,
-0xcdf0b680UL, 0x17844d3bUL, 0x31eef84dUL, 0x7e0824e4UL, 0x2ccb49ebUL, 0x846a3baeUL, 0x8ff77888UL, 0xee5d60f6UL,
-0x7af75673UL, 0x2fdd5cdbUL, 0xa11631c1UL, 0x30f66f43UL, 0xb3faec54UL, 0x157fd7faUL, 0xef8579ccUL, 0xd152de58UL,
-0xdb2ffd5eUL, 0x8f32ce19UL, 0x306af97aUL, 0x02f03ef8UL, 0x99319ad5UL, 0xc242fa0fUL, 0xa7e3ebb0UL, 0xc68e4906UL,
-0xb8da230cUL, 0x80823028UL, 0xdcdef3c8UL, 0xd35fb171UL, 0x088a1bc8UL, 0xbec0c560UL, 0x61a3c9e8UL, 0xbca8f54dUL,
-0xc72feffaUL, 0x22822e99UL, 0x82c570b4UL, 0xd8d94e89UL, 0x8b1c34bcUL, 0x301e16e6UL, 0x273be979UL, 0xb0ffeaa6UL,
-0x61d9b8c6UL, 0x00b24869UL, 0xb7ffce3fUL, 0x08dc283bUL, 0x43daf65aUL, 0xf7e19798UL, 0x7619b72fUL, 0x8f1c9ba4UL,
-0xdc8637a0UL, 0x16a7d3b1UL, 0x9fc393b7UL, 0xa7136eebUL, 0xc6bcc63eUL, 0x1a513742UL, 0xef6828bcUL, 0x520365d6UL,
-0x2d6a77abUL, 0x3527ed4bUL, 0x821fd216UL, 0x095c6e2eUL, 0xdb92f2fbUL, 0x5eea29cbUL, 0x145892f5UL, 0x91584f7fUL,
-0x5483697bUL, 0x2667a8ccUL, 0x85196048UL, 0x8c4baceaUL, 0x833860d4UL, 0x0d23e0f9UL, 0x6c387e8aUL, 0x0ae6d249UL,
-0xb284600cUL, 0xd835731dUL, 0xdcb1c647UL, 0xac4c56eaUL, 0x3ebd81b3UL, 0x230eabb0UL, 0x6438bc87UL, 0xf0b5b1faUL,
-0x8f5ea2b3UL, 0xfc184642UL, 0x0a036b7aUL, 0x4fb089bdUL, 0x649da589UL, 0xa345415eUL, 0x5c038323UL, 0x3e5d3bb9UL,
-0x43d79572UL, 0x7e6dd07cUL, 0x06dfdf1eUL, 0x6c6cc4efUL, 0x7160a539UL, 0x73bfbe70UL, 0x83877605UL, 0x4523ecf1UL};
-
-uint32_t s3[] PROGMEM = {
-0x8defc240UL, 0x25fa5d9fUL, 0xeb903dbfUL, 0xe810c907UL, 0x47607fffUL, 0x369fe44bUL, 0x8c1fc644UL, 0xaececa90UL,
-0xbeb1f9bfUL, 0xeefbcaeaUL, 0xe8cf1950UL, 0x51df07aeUL, 0x920e8806UL, 0xf0ad0548UL, 0xe13c8d83UL, 0x927010d5UL,
-0x11107d9fUL, 0x07647db9UL, 0xb2e3e4d4UL, 0x3d4f285eUL, 0xb9afa820UL, 0xfade82e0UL, 0xa067268bUL, 0x8272792eUL,
-0x553fb2c0UL, 0x489ae22bUL, 0xd4ef9794UL, 0x125e3fbcUL, 0x21fffceeUL, 0x825b1bfdUL, 0x9255c5edUL, 0x1257a240UL,
-0x4e1a8302UL, 0xbae07fffUL, 0x528246e7UL, 0x8e57140eUL, 0x3373f7bfUL, 0x8c9f8188UL, 0xa6fc4ee8UL, 0xc982b5a5UL,
-0xa8c01db7UL, 0x579fc264UL, 0x67094f31UL, 0xf2bd3f5fUL, 0x40fff7c1UL, 0x1fb78dfcUL, 0x8e6bd2c1UL, 0x437be59bUL,
-0x99b03dbfUL, 0xb5dbc64bUL, 0x638dc0e6UL, 0x55819d99UL, 0xa197c81cUL, 0x4a012d6eUL, 0xc5884a28UL, 0xccc36f71UL,
-0xb843c213UL, 0x6c0743f1UL, 0x8309893cUL, 0x0feddd5fUL, 0x2f7fe850UL, 0xd7c07f7eUL, 0x02507fbfUL, 0x5afb9a04UL,
-0xa747d2d0UL, 0x1651192eUL, 0xaf70bf3eUL, 0x58c31380UL, 0x5f98302eUL, 0x727cc3c4UL, 0x0a0fb402UL, 0x0f7fef82UL,
-0x8c96fdadUL, 0x5d2c2aaeUL, 0x8ee99a49UL, 0x50da88b8UL, 0x8427f4a0UL, 0x1eac5790UL, 0x796fb449UL, 0x8252dc15UL,
-0xefbd7d9bUL, 0xa672597dUL, 0xada840d8UL, 0x45f54504UL, 0xfa5d7403UL, 0xe83ec305UL, 0x4f91751aUL, 0x925669c2UL,
-0x23efe941UL, 0xa903f12eUL, 0x60270df2UL, 0x0276e4b6UL, 0x94fd6574UL, 0x927985b2UL, 0x8276dbcbUL, 0x02778176UL,
-0xf8af918dUL, 0x4e48f79eUL, 0x8f616ddfUL, 0xe29d840eUL, 0x842f7d83UL, 0x340ce5c8UL, 0x96bbb682UL, 0x93b4b148UL,
-0xef303cabUL, 0x984faf28UL, 0x779faf9bUL, 0x92dc560dUL, 0x224d1e20UL, 0x8437aa88UL, 0x7d29dc96UL, 0x2756d3dcUL,
-0x8b907ceeUL, 0xb51fd240UL, 0xe7c07ce3UL, 0xe566b4a1UL, 0xc3e9615eUL, 0x3cf8209dUL, 0x6094d1e3UL, 0xcd9ca341UL,
-0x5c76460eUL, 0x00ea983bUL, 0xd4d67881UL, 0xfd47572cUL, 0xf76cedd9UL, 0xbda8229cUL, 0x127dadaaUL, 0x438a074eUL,
-0x1f97c090UL, 0x081bdb8aUL, 0x93a07ebeUL, 0xb938ca15UL, 0x97b03cffUL, 0x3dc2c0f8UL, 0x8d1ab2ecUL, 0x64380e51UL,
-0x68cc7bfbUL, 0xd90f2788UL, 0x12490181UL, 0x5de5ffd4UL, 0xdd7ef86aUL, 0x76a2e214UL, 0xb9a40368UL, 0x925d958fUL,
-0x4b39fffaUL, 0xba39aee9UL, 0xa4ffd30bUL, 0xfaf7933bUL, 0x6d498623UL, 0x193cbcfaUL, 0x27627545UL, 0x825cf47aUL,
-0x61bd8ba0UL, 0xd11e42d1UL, 0xcead04f4UL, 0x127ea392UL, 0x10428db7UL, 0x8272a972UL, 0x9270c4a8UL, 0x127de50bUL,
-0x285ba1c8UL, 0x3c62f44fUL, 0x35c0eaa5UL, 0xe805d231UL, 0x428929fbUL, 0xb4fcdf82UL, 0x4fb66a53UL, 0x0e7dc15bUL,
-0x1f081fabUL, 0x108618aeUL, 0xfcfd086dUL, 0xf9ff2889UL, 0x694bcc11UL, 0x236a5caeUL, 0x12deca4dUL, 0x2c3f8cc5UL,
-0xd2d02dfeUL, 0xf8ef5896UL, 0xe4cf52daUL, 0x95155b67UL, 0x494a488cUL, 0xb9b6a80cUL, 0x5c8f82bcUL, 0x89d36b45UL,
-0x3a609437UL, 0xec00c9a9UL, 0x44715253UL, 0x0a874b49UL, 0xd773bc40UL, 0x7c34671cUL, 0x02717ef6UL, 0x4feb5536UL,
-0xa2d02fffUL, 0xd2bf60c4UL, 0xd43f03c0UL, 0x50b4ef6dUL, 0x07478cd1UL, 0x006e1888UL, 0xa2e53f55UL, 0xb9e6d4bcUL,
-0xa2048016UL, 0x97573833UL, 0xd7207d67UL, 0xde0f8f3dUL, 0x72f87b33UL, 0xabcc4f33UL, 0x7688c55dUL, 0x7b00a6b0UL,
-0x947b0001UL, 0x570075d2UL, 0xf9bb88f8UL, 0x8942019eUL, 0x4264a5ffUL, 0x856302e0UL, 0x72dbd92bUL, 0xee971b69UL,
-0x6ea22fdeUL, 0x5f08ae2bUL, 0xaf7a616dUL, 0xe5c98767UL, 0xcf1febd2UL, 0x61efc8c2UL, 0xf1ac2571UL, 0xcc8239c2UL,
-0x67214cb8UL, 0xb1e583d1UL, 0xb7dc3e62UL, 0x7f10bdceUL, 0xf90a5c38UL, 0x0ff0443dUL, 0x606e6dc6UL, 0x60543a49UL,
-0x5727c148UL, 0x2be98a1dUL, 0x8ab41738UL, 0x20e1be24UL, 0xaf96da0fUL, 0x68458425UL, 0x99833be5UL, 0x600d457dUL,
-0x282f9350UL, 0x8334b362UL, 0xd91d1120UL, 0x2b6d8da0UL, 0x642b1e31UL, 0x9c305a00UL, 0x52bce688UL, 0x1b03588aUL,
-0xf7baefd5UL, 0x4142ed9cUL, 0xa4315c11UL, 0x83323ec5UL, 0xdfef4636UL, 0xa133c501UL, 0xe9d3531cUL, 0xee353783UL};
-
-uint32_t s4[] PROGMEM = {
-0x9db30420UL, 0x1fb6e9deUL, 0xa7be7befUL, 0xd273a298UL, 0x4a4f7bdbUL, 0x64ad8c57UL, 0x85510443UL, 0xfa020ed1UL,
-0x7e287affUL, 0xe60fb663UL, 0x095f35a1UL, 0x79ebf120UL, 0xfd059d43UL, 0x6497b7b1UL, 0xf3641f63UL, 0x241e4adfUL,
-0x28147f5fUL, 0x4fa2b8cdUL, 0xc9430040UL, 0x0cc32220UL, 0xfdd30b30UL, 0xc0a5374fUL, 0x1d2d00d9UL, 0x24147b15UL,
-0xee4d111aUL, 0x0fca5167UL, 0x71ff904cUL, 0x2d195ffeUL, 0x1a05645fUL, 0x0c13fefeUL, 0x081b08caUL, 0x05170121UL,
-0x80530100UL, 0xe83e5efeUL, 0xac9af4f8UL, 0x7fe72701UL, 0xd2b8ee5fUL, 0x06df4261UL, 0xbb9e9b8aUL, 0x7293ea25UL,
-0xce84ffdfUL, 0xf5718801UL, 0x3dd64b04UL, 0xa26f263bUL, 0x7ed48400UL, 0x547eebe6UL, 0x446d4ca0UL, 0x6cf3d6f5UL,
-0x2649abdfUL, 0xaea0c7f5UL, 0x36338cc1UL, 0x503f7e93UL, 0xd3772061UL, 0x11b638e1UL, 0x72500e03UL, 0xf80eb2bbUL,
-0xabe0502eUL, 0xec8d77deUL, 0x57971e81UL, 0xe14f6746UL, 0xc9335400UL, 0x6920318fUL, 0x081dbb99UL, 0xffc304a5UL,
-0x4d351805UL, 0x7f3d5ce3UL, 0xa6c866c6UL, 0x5d5bcca9UL, 0xdaec6feaUL, 0x9f926f91UL, 0x9f46222fUL, 0x3991467dUL,
-0xa5bf6d8eUL, 0x1143c44fUL, 0x43958302UL, 0xd0214eebUL, 0x022083b8UL, 0x3fb6180cUL, 0x18f8931eUL, 0x281658e6UL,
-0x26486e3eUL, 0x8bd78a70UL, 0x7477e4c1UL, 0xb506e07cUL, 0xf32d0a25UL, 0x79098b02UL, 0xe4eabb81UL, 0x28123b23UL,
-0x69dead38UL, 0x1574ca16UL, 0xdf871b62UL, 0x211c40b7UL, 0xa51a9ef9UL, 0x0014377bUL, 0x041e8ac8UL, 0x09114003UL,
-0xbd59e4d2UL, 0xe3d156d5UL, 0x4fe876d5UL, 0x2f91a340UL, 0x557be8deUL, 0x00eae4a7UL, 0x0ce5c2ecUL, 0x4db4bba6UL,
-0xe756bdffUL, 0xdd3369acUL, 0xec17b035UL, 0x06572327UL, 0x99afc8b0UL, 0x56c8c391UL, 0x6b65811cUL, 0x5e146119UL,
-0x6e85cb75UL, 0xbe07c002UL, 0xc2325577UL, 0x893ff4ecUL, 0x5bbfc92dUL, 0xd0ec3b25UL, 0xb7801ab7UL, 0x8d6d3b24UL,
-0x20c763efUL, 0xc366a5fcUL, 0x9c382880UL, 0x0ace3205UL, 0xaac9548aUL, 0xeca1d7c7UL, 0x041afa32UL, 0x1d16625aUL,
-0x6701902cUL, 0x9b757a54UL, 0x31d477f7UL, 0x9126b031UL, 0x36cc6fdbUL, 0xc70b8b46UL, 0xd9e66a48UL, 0x56e55a79UL,
-0x026a4cebUL, 0x52437effUL, 0x2f8f76b4UL, 0x0df980a5UL, 0x8674cde3UL, 0xedda04ebUL, 0x17a9be04UL, 0x2c18f4dfUL,
-0xb7747f9dUL, 0xab2af7b4UL, 0xefc34d20UL, 0x2e096b7cUL, 0x1741a254UL, 0xe5b6a035UL, 0x213d42f6UL, 0x2c1c7c26UL,
-0x61c2f50fUL, 0x6552daf9UL, 0xd2c231f8UL, 0x25130f69UL, 0xd8167fa2UL, 0x0418f2c8UL, 0x001a96a6UL, 0x0d1526abUL,
-0x63315c21UL, 0x5e0a72ecUL, 0x49bafefdUL, 0x187908d9UL, 0x8d0dbd86UL, 0x311170a7UL, 0x3e9b640cUL, 0xcc3e10d7UL,
-0xd5cad3b6UL, 0x0caec388UL, 0xf73001e1UL, 0x6c728affUL, 0x71eae2a1UL, 0x1f9af36eUL, 0xcfcbd12fUL, 0xc1de8417UL,
-0xac07be6bUL, 0xcb44a1d8UL, 0x8b9b0f56UL, 0x013988c3UL, 0xb1c52fcaUL, 0xb4be31cdUL, 0xd8782806UL, 0x12a3a4e2UL,
-0x6f7de532UL, 0x58fd7eb6UL, 0xd01ee900UL, 0x24adffc2UL, 0xf4990fc5UL, 0x9711aac5UL, 0x001d7b95UL, 0x82e5e7d2UL,
-0x109873f6UL, 0x00613096UL, 0xc32d9521UL, 0xada121ffUL, 0x29908415UL, 0x7fbb977fUL, 0xaf9eb3dbUL, 0x29c9ed2aUL,
-0x5ce2a465UL, 0xa730f32cUL, 0xd0aa3fe8UL, 0x8a5cc091UL, 0xd49e2ce7UL, 0x0ce454a9UL, 0xd60acd86UL, 0x015f1919UL,
-0x77079103UL, 0xdea03af6UL, 0x78a8565eUL, 0xdee356dfUL, 0x21f05cbeUL, 0x8b75e387UL, 0xb3c50651UL, 0xb8a5c3efUL,
-0xd8eeb6d2UL, 0xe523be77UL, 0xc2154529UL, 0x2f69efdfUL, 0xafe67afbUL, 0xf470c4b2UL, 0xf3e0eb5bUL, 0xd6cc9876UL,
-0x39e4460cUL, 0x1fda8538UL, 0x1987832fUL, 0xca007367UL, 0xa99144f8UL, 0x296b299eUL, 0x492fc295UL, 0x9266beabUL,
-0xb5676e69UL, 0x9bd3dddaUL, 0xdf7e052fUL, 0xdb25701cUL, 0x1b5e51eeUL, 0xf65324e6UL, 0x6afce36cUL, 0x0316cc04UL,
-0x8644213eUL, 0xb7dc59d0UL, 0x7965291fUL, 0xccd6fd43UL, 0x41823979UL, 0x932bcdf6UL, 0xb657c34dUL, 0x4edfd282UL,
-0x7ae5290cUL, 0x3cb9536bUL, 0x851e20feUL, 0x9833557eUL, 0x13ecf0b0UL, 0xd3ffb372UL, 0x3f85c5c1UL, 0x0aef7ed2UL};
-
-
-
-
-#endif /*CAST6_SBOXES_H_*/
diff --git a/des.c b/des.c
deleted file mode 100644
index 2600a22..0000000
--- a/des.c
+++ /dev/null
@@ -1,390 +0,0 @@
-/* des.c */
-/*
-    This file is part of the AVR-Crypto-Lib.
-    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-/**
- * \file     des.c
- * \author   Daniel Otte
- * \email    daniel.otte@rub.de
- * \date     2007-06-16
- * \brief    DES and EDE-DES implementation
- * \license	 GPLv3 or later
- * 
- */
-#include "config.h"
-#include "debug.h"
-#include "cli.h"
-#include <stdint.h>
-#include <string.h>
-#include <util/delay.h>
-#include <avr/pgmspace.h>
-
-prog_uint8_t sbox[256]  = {
-  /* S-box 1 */
-  0xE4, 0xD1, 0x2F, 0xB8, 0x3A, 0x6C, 0x59, 0x07,
-  0x0F, 0x74, 0xE2, 0xD1, 0xA6, 0xCB, 0x95, 0x38,
-  0x41, 0xE8, 0xD6, 0x2B, 0xFC, 0x97, 0x3A, 0x50,
-  0xFC, 0x82, 0x49, 0x17, 0x5B, 0x3E, 0xA0, 0x6D,
-  /* S-box 2 */
-  0xF1, 0x8E, 0x6B, 0x34, 0x97, 0x2D, 0xC0, 0x5A,
-  0x3D, 0x47, 0xF2, 0x8E, 0xC0, 0x1A, 0x69, 0xB5,
-  0x0E, 0x7B, 0xA4, 0xD1, 0x58, 0xC6, 0x93, 0x2F,
-  0xD8, 0xA1, 0x3F, 0x42, 0xB6, 0x7C, 0x05, 0xE9,
-  /* S-box 3 */
-  0xA0, 0x9E, 0x63, 0xF5, 0x1D, 0xC7, 0xB4, 0x28,
-  0xD7, 0x09, 0x34, 0x6A, 0x28, 0x5E, 0xCB, 0xF1,
-  0xD6, 0x49, 0x8F, 0x30, 0xB1, 0x2C, 0x5A, 0xE7,
-  0x1A, 0xD0, 0x69, 0x87, 0x4F, 0xE3, 0xB5, 0x2C,
-  /* S-box 4 */
-  0x7D, 0xE3, 0x06, 0x9A, 0x12, 0x85, 0xBC, 0x4F,
-  0xD8, 0xB5, 0x6F, 0x03, 0x47, 0x2C, 0x1A, 0xE9,
-  0xA6, 0x90, 0xCB, 0x7D, 0xF1, 0x3E, 0x52, 0x84,
-  0x3F, 0x06, 0xA1, 0xD8, 0x94, 0x5B, 0xC7, 0x2E,
-  /* S-box 5 */
-  0x2C, 0x41, 0x7A, 0xB6, 0x85, 0x3F, 0xD0, 0xE9,
-  0xEB, 0x2C, 0x47, 0xD1, 0x50, 0xFA, 0x39, 0x86,
-  0x42, 0x1B, 0xAD, 0x78, 0xF9, 0xC5, 0x63, 0x0E,
-  0xB8, 0xC7, 0x1E, 0x2D, 0x6F, 0x09, 0xA4, 0x53,
-  /* S-box 6 */
-  0xC1, 0xAF, 0x92, 0x68, 0x0D, 0x34, 0xE7, 0x5B,
-  0xAF, 0x42, 0x7C, 0x95, 0x61, 0xDE, 0x0B, 0x38,
-  0x9E, 0xF5, 0x28, 0xC3, 0x70, 0x4A, 0x1D, 0xB6,
-  0x43, 0x2C, 0x95, 0xFA, 0xBE, 0x17, 0x60, 0x8D,
-  /* S-box 7 */
-  0x4B, 0x2E, 0xF0, 0x8D, 0x3C, 0x97, 0x5A, 0x61,
-  0xD0, 0xB7, 0x49, 0x1A, 0xE3, 0x5C, 0x2F, 0x86,
-  0x14, 0xBD, 0xC3, 0x7E, 0xAF, 0x68, 0x05, 0x92,
-  0x6B, 0xD8, 0x14, 0xA7, 0x95, 0x0F, 0xE2, 0x3C,
-  /* S-box 8 */
-  0xD2, 0x84, 0x6F, 0xB1, 0xA9, 0x3E, 0x50, 0xC7,
-  0x1F, 0xD8, 0xA3, 0x74, 0xC5, 0x6B, 0x0E, 0x92,
-  0x7B, 0x41, 0x9C, 0xE2, 0x06, 0xAD, 0xF3, 0x58,
-  0x21, 0xE7, 0x4A, 0x8D, 0xFC, 0x90, 0x35, 0x6B
-};
-
-prog_uint8_t e_permtab[] ={ 
-	 4,  6, 					/* 4 bytes in 6 bytes out*/
-	32,  1,  2,  3,  4,  5,
-	 4,  5,  6,  7,  8,  9,
-	 8,  9, 10, 11, 12, 13,
-	12, 13, 14, 15, 16, 17,
-	16, 17, 18, 19, 20, 21,
-	20, 21, 22, 23, 24, 25,
-	24, 25, 26, 27, 28, 29,
-	28, 29, 30, 31, 32,  1
-};
-
-prog_uint8_t p_permtab[] ={ 
-	 4,  4,						/* 32 bit -> 32 bit */
-	16,  7, 20, 21,
-	29, 12, 28, 17,
-	 1, 15, 23, 26,
-	 5, 18, 31, 10,
-	 2,  8, 24, 14,
-	32, 27,  3,  9,
-	19, 13, 30,  6,
-	22, 11,  4, 25
-};
-
-prog_uint8_t ip_permtab[] ={ 
-	 8,  8,						/* 64 bit -> 64 bit */
-	58, 50, 42, 34, 26, 18, 10, 2,
-	60, 52, 44, 36, 28, 20, 12, 4,
-	62, 54, 46, 38, 30, 22, 14, 6,
-	64, 56, 48, 40, 32, 24, 16, 8,
-	57, 49, 41, 33, 25, 17,  9, 1,
-	59, 51, 43, 35, 27, 19, 11, 3,
-	61, 53, 45, 37, 29, 21, 13, 5,
-	63, 55, 47, 39, 31, 23, 15, 7
-};
-
-prog_uint8_t inv_ip_permtab[] ={ 
-	 8, 8,						/* 64 bit -> 64 bit */
-	40, 8, 48, 16, 56, 24, 64, 32,
-	39, 7, 47, 15, 55, 23, 63, 31,
-	38, 6, 46, 14, 54, 22, 62, 30,
-	37, 5, 45, 13, 53, 21, 61, 29,
-	36, 4, 44, 12, 52, 20, 60, 28,
-	35, 3, 43, 11, 51, 19, 59, 27,
-	34, 2, 42, 10, 50, 18, 58, 26,
-	33, 1, 41,  9, 49, 17, 57, 25
-};
-
-prog_uint8_t pc1_permtab[] ={ 
-	 8,  7, 					/* 64 bit -> 56 bit*/
-	57, 49, 41, 33, 25, 17,  9,
-	 1, 58, 50, 42, 34, 26, 18,
-	10,  2, 59, 51, 43, 35, 27,
-	19, 11,  3, 60, 52, 44, 36,
-	63, 55, 47, 39, 31, 23, 15,
-	 7, 62, 54, 46, 38, 30, 22,
-	14,  6, 61, 53, 45, 37, 29,
-	21, 13,  5, 28, 20, 12,  4
-};
-
-prog_uint8_t pc2_permtab[] ={ 
-	 7,	 6, 					/* 56 bit -> 48 bit */
-	14, 17, 11, 24,  1,  5,
-	 3, 28, 15,  6, 21, 10,
-	23, 19, 12,  4, 26,  8,
-	16,  7, 27, 20, 13,  2,
-	41, 52, 31, 37, 47, 55,
-	30, 40, 51, 45, 33, 48,
-	44, 49, 39, 56, 34, 53,
-	46, 42, 50, 36, 29, 32
-};
-
-prog_uint8_t splitin6bitword_permtab[] = {
-	 8,  8, 					/* 64 bit -> 64 bit */
-	64, 64,  1,  6,  2,  3,  4,  5, 
-	64, 64,  7, 12,  8,  9, 10, 11, 
-	64, 64, 13, 18, 14, 15, 16, 17, 
-	64, 64, 19, 24, 20, 21, 22, 23, 
-	64, 64, 25, 30, 26, 27, 28, 29, 
-	64, 64, 31, 36, 32, 33, 34, 35, 
-	64, 64, 37, 42, 38, 39, 40, 41, 
-	64, 64, 43, 48, 44, 45, 46, 47 
-};
-
-prog_uint8_t shiftkey_permtab[] = {
-	 7,  7, 					/* 56 bit -> 56 bit */
-	 2,  3,  4,  5,  6,  7,  8,  9,
-	10, 11, 12, 13, 14, 15, 16, 17,
-	18, 19, 20, 21, 22, 23, 24, 25, 
-	26, 27, 28,  1, 
-	30, 31, 32, 33, 34, 35, 36, 37, 
-	38, 39, 40, 41, 42, 43, 44, 45, 
-	46, 47, 48, 49, 50, 51, 52, 53, 
-	54, 55, 56, 29
-};
-
-prog_uint8_t shiftkeyinv_permtab[] = {
-	 7,  7,
-	28,  1,  2,  3,  4,  5,  6,  7,
-	 8,  9, 10, 11, 12, 13, 14, 15,
-	16, 17, 18, 19, 20, 21, 22, 23,
-	24, 25, 26, 27,
-	56, 29, 30, 31, 32, 33, 34, 35, 
-	36, 37, 38, 39, 40, 41, 42, 43, 
-	44, 45, 46, 47, 48, 49, 50, 51, 
-	52, 53, 54, 55
-};
-
-/*
-1 0
-1 0
-2 1
-2 1
-2 1
-2 1
-2 1
-2 1
-----
-1 0
-2 1
-2 1
-2 1
-2 1
-2 1
-2 1
-1 0
-*/
-#define ROTTABLE      0x7EFC 
-#define ROTTABLE_INV  0x3F7E
-/******************************************************************************/
-
-void permute(prog_uint8_t *ptable, const uint8_t *in, uint8_t *out){
-	uint8_t ib, ob; /* in-bytes and out-bytes */
-	uint8_t byte, bit; /* counter for bit and byte */
-	ib = pgm_read_byte(&(ptable[0]));
-	ob = pgm_read_byte(&(ptable[1]));
-	ptable = &(ptable[2]);
-	for(byte=0; byte<ob; ++byte){
-		uint8_t x,t=0;
-		for(bit=0; bit<8; ++bit){
-			x=pgm_read_byte(&(ptable[byte*8+bit])) -1 ;
-				t<<=1;
-			if((in[x/8]) & (0x80>>(x%8)) ){
-				t|=0x01;
-			}
-		}
-		out[byte]=t;
-	}
-}
-
-/******************************************************************************/
-
-void changeendian32(uint32_t * a){
-	*a = (*a & 0x000000FF) << 24 |
-		 (*a & 0x0000FF00) <<  8 |
-		 (*a & 0x00FF0000) >>  8 |
-		 (*a & 0xFF000000) >> 24;
-}
-
-/******************************************************************************/
-static inline
-void shiftkey(uint8_t *key){
-	uint8_t k[7];
-	memcpy(k, key, 7);
-	permute((prog_uint8_t*)shiftkey_permtab, k, key);	
-}
-
-/******************************************************************************/
-static inline
-void shiftkey_inv(uint8_t *key){
-	uint8_t k[7];
-	memcpy(k, key, 7);
-	permute((prog_uint8_t*)shiftkeyinv_permtab, k, key);
-	
-}
-
-/******************************************************************************/
-static inline
-uint64_t splitin6bitwords(uint64_t a){
-	uint64_t ret=0;
-	a &= 0x0000ffffffffffffLL;
-	permute((prog_uint8_t*)splitin6bitword_permtab, (uint8_t*)&a, (uint8_t*)&ret);	
-	return ret;
-}
-
-/******************************************************************************/
-
-static inline
-uint8_t substitute(uint8_t a, prog_uint8_t * sbp){
-	uint8_t x;	
-	x = pgm_read_byte(&(sbp[a>>1]));
-	x = (a&1)?x&0x0F:x>>4;
-	return x;
-	
-}
-
-/******************************************************************************/
-
-uint32_t des_f(uint32_t r, uint8_t* kr){
-	uint8_t i;
-	uint32_t t=0,ret;
-	uint64_t data;
-	prog_uint8_t *sbp; /* sboxpointer */ 
-	permute((prog_uint8_t*)e_permtab, (uint8_t*)&r, (uint8_t*)&data);
-	for(i=0; i<7; ++i)
-		((uint8_t*)&data)[i] ^= kr[i];
-	
-	/* Sbox substitution */
-	data = splitin6bitwords(data);
-	sbp=(prog_uint8_t*)sbox;
-	for(i=0; i<8; ++i){
-		uint8_t x;
-		x = substitute(((uint8_t*)&data)[i], sbp);
-		t<<=4;
-		t |= x;
-		sbp += 32;
-	}
-	changeendian32(&t);
-		
-	permute((prog_uint8_t*)p_permtab,(uint8_t*)&t, (uint8_t*)&ret);
-
-	return ret;
-}
-
-/******************************************************************************/
-
-void des_enc(void* out, const void* in, const void* key){
-#define R *((uint32_t*)&(data[4]))
-#define L *((uint32_t*)&(data[0]))
-
-	uint8_t data[8],kr[6],k[7];
-	uint8_t i;
-	
-	permute((prog_uint8_t*)ip_permtab, (uint8_t*)in, data);
-	permute((prog_uint8_t*)pc1_permtab, (uint8_t*)key, k);
-	for(i=0; i<8; ++i){
-		shiftkey(k);
-		if(ROTTABLE&((1<<((i<<1)+0))) )
-			shiftkey(k);
-		permute((prog_uint8_t*)pc2_permtab, k, kr);
-		L ^= des_f(R, kr);
-		
-		shiftkey(k);
-		if(ROTTABLE&((1<<((i<<1)+1))) )
-			shiftkey(k);
-		permute((prog_uint8_t*)pc2_permtab, k, kr);
-		R ^= des_f(L, kr);
-
-	}
-	/* L <-> R*/
-	R ^= L;
-	L ^= R;
-	R ^= L;
-	
-	permute((prog_uint8_t*)inv_ip_permtab, data, (uint8_t*)out);
-}
-
-/******************************************************************************/
-
-void des_dec(void* out, const void* in, const uint8_t* key){
-#define R *((uint32_t*)&(data[4]))
-#define L *((uint32_t*)&(data[0]))
-
-	uint8_t data[8],kr[6],k[7];
-	int8_t i;
-	
-	permute((prog_uint8_t*)ip_permtab, (uint8_t*)in, data);
-	permute((prog_uint8_t*)pc1_permtab, (uint8_t*)key, k);
-	for(i=7; i>=0; --i){
-		
-		permute((prog_uint8_t*)pc2_permtab, k, kr);
-		L ^= des_f(R, kr);
-		shiftkey_inv(k);
-		if(ROTTABLE&((1<<((i<<1)+1))) ){
-			shiftkey_inv(k);
-		}
-
-		permute((prog_uint8_t*)pc2_permtab, k, kr);
-		R ^= des_f(L, kr);
-		shiftkey_inv(k);
-		if(ROTTABLE&((1<<((i<<1)+0))) ){
-			shiftkey_inv(k);
-		}
-
-	}
-	/* L <-> R*/
-	R ^= L;
-	L ^= R;
-	R ^= L;
-	
-	permute((prog_uint8_t*)inv_ip_permtab, data, (uint8_t*)out);
-}
-
-/******************************************************************************/
-
-void tdes_enc(void* out, void* in, const void* key){
-	des_enc(out,  in, (uint8_t*)key + 0);
-	des_dec(out, out, (uint8_t*)key + 8);
-	des_enc(out, out, (uint8_t*)key +16);
-}
-
-/******************************************************************************/
-
-void tdes_dec(void* out, void* in, const uint8_t* key){
-	des_dec(out,  in, (uint8_t*)key + 0);
-	des_enc(out, out, (uint8_t*)key + 8);
-	des_dec(out, out, (uint8_t*)key +16);
-}
-
-/******************************************************************************/
-
-
diff --git a/des.h b/des.h
deleted file mode 100644
index 082de13..0000000
--- a/des.h
+++ /dev/null
@@ -1,100 +0,0 @@
-/* des.h */
-/*
-    This file is part of the AVR-Crypto-Lib.
-    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-/**
- * \file	des.h
- * \author	Daniel Otte 
- * \date	2007-06-16
- * \brief 	des and tdes declarations
- * \license	GPLv3 or later
- * 
- */
-#ifndef DES_H_
-#define DES_H_
-
-/* the FIPS 46-3 (1999-10-25) name for triple DES is triple data encryption algorithm so TDEA.
- * Also we only implement the three key mode  */
-
-/** \def tdea_enc
- * \brief defining an alias for void tdes_enc(void* out, const void* in, const void* key)
- */
-
-/** \def tdea_dec
- * \brief defining an alias for void tdes_dec(void* out, const void* in, const void* key)
- */
-
-#define tdea_enc tdes_enc
-#define tdea_dec tdes_dec
-
-/** \fn void des_enc(void* out, const void* in, const void* key)
- * \brief encrypt a block with DES
- * 
- * This function encrypts a block of 64 bits (8 bytes) with the DES algorithm.
- * Key expansion is done automatically. The key is 64 bits long, but note that
- * only 56 bits are used (the LSB of each byte is droped). The input and output
- * blocks may overlap.
- * 
- * \param out pointer to the block (64 bit = 8 byte) where the ciphertext is written to
- * \param in  pointer to the block (64 bit = 8 byte) where the plaintext is read from
- * \param key pointer to the key (64 bit = 8 byte)
- */
-void des_enc(void* out, const void* in, const void* key);
-
-/** \fn void des_dec(void* out, const void* in, const void* key)
- * \brief decrypt a block with DES
- * 
- * This function decrypts a block of 64 bits (8 bytes) with the DES algorithm.
- * Key expansion is done automatically. The key is 64 bits long, but note that
- * only 56 bits are used (the LSB of each byte is droped). The input and output
- * blocks may overlap.
- * 
- * \param out pointer to the block (64 bit = 8 byte) where the plaintext is written to
- * \param in  pointer to the block (64 bit = 8 byte) where the ciphertext is read from
- * \param key pointer to the key (64 bit = 8 byte)
- */
-void des_dec(void* out, const void* in, const void* key);
-
-/** \fn void tdes_enc(void* out, const void* in, const void* key)
- * \brief encrypt a block with Tripple-DES
- * 
- * This function encrypts a block of 64 bits (8 bytes) with the Tripple-DES (EDE)
- * algorithm. Key expansion is done automatically. The key is 192 bits long, but
- * note that only 178 bits are used (the LSB of each byte is droped). The input
- * and output blocks may overlap.
- * 
- * \param out pointer to the block (64 bit = 8 byte) where the ciphertext is written to
- * \param in  pointer to the block (64 bit = 8 byte) where the plaintext is read from
- * \param key pointer to the key (192 bit = 24 byte)
- */
-void tdes_enc(void* out, const void* in, const void* key);
-
-/** \fn void tdes_dec(void* out, const void* in, const void* key)
- * \brief decrypt a block with Tripple-DES
- * 
- * This function decrypts a block of 64 bits (8 bytes) with the Tripple-DES (EDE)
- * algorithm. Key expansion is done automatically. The key is 192 bits long, but
- * note that only 178 bits are used (the LSB of each byte is droped). The input
- * and output blocks may overlap.
- * 
- * \param out pointer to the block (64 bit = 8 byte) where the plaintext is written to
- * \param in  pointer to the block (64 bit = 8 byte) where the ciphertext is read from
- * \param key pointer to the key (192 bit = 24 byte)
- */
- void tdes_dec(void* out, const void* in, const void* key);
-
-#endif /*DES_H_*/
diff --git a/des/des.c b/des/des.c
new file mode 100644
index 0000000..2600a22
--- /dev/null
+++ b/des/des.c
@@ -0,0 +1,390 @@
+/* des.c */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/**
+ * \file     des.c
+ * \author   Daniel Otte
+ * \email    daniel.otte@rub.de
+ * \date     2007-06-16
+ * \brief    DES and EDE-DES implementation
+ * \license	 GPLv3 or later
+ * 
+ */
+#include "config.h"
+#include "debug.h"
+#include "cli.h"
+#include <stdint.h>
+#include <string.h>
+#include <util/delay.h>
+#include <avr/pgmspace.h>
+
+prog_uint8_t sbox[256]  = {
+  /* S-box 1 */
+  0xE4, 0xD1, 0x2F, 0xB8, 0x3A, 0x6C, 0x59, 0x07,
+  0x0F, 0x74, 0xE2, 0xD1, 0xA6, 0xCB, 0x95, 0x38,
+  0x41, 0xE8, 0xD6, 0x2B, 0xFC, 0x97, 0x3A, 0x50,
+  0xFC, 0x82, 0x49, 0x17, 0x5B, 0x3E, 0xA0, 0x6D,
+  /* S-box 2 */
+  0xF1, 0x8E, 0x6B, 0x34, 0x97, 0x2D, 0xC0, 0x5A,
+  0x3D, 0x47, 0xF2, 0x8E, 0xC0, 0x1A, 0x69, 0xB5,
+  0x0E, 0x7B, 0xA4, 0xD1, 0x58, 0xC6, 0x93, 0x2F,
+  0xD8, 0xA1, 0x3F, 0x42, 0xB6, 0x7C, 0x05, 0xE9,
+  /* S-box 3 */
+  0xA0, 0x9E, 0x63, 0xF5, 0x1D, 0xC7, 0xB4, 0x28,
+  0xD7, 0x09, 0x34, 0x6A, 0x28, 0x5E, 0xCB, 0xF1,
+  0xD6, 0x49, 0x8F, 0x30, 0xB1, 0x2C, 0x5A, 0xE7,
+  0x1A, 0xD0, 0x69, 0x87, 0x4F, 0xE3, 0xB5, 0x2C,
+  /* S-box 4 */
+  0x7D, 0xE3, 0x06, 0x9A, 0x12, 0x85, 0xBC, 0x4F,
+  0xD8, 0xB5, 0x6F, 0x03, 0x47, 0x2C, 0x1A, 0xE9,
+  0xA6, 0x90, 0xCB, 0x7D, 0xF1, 0x3E, 0x52, 0x84,
+  0x3F, 0x06, 0xA1, 0xD8, 0x94, 0x5B, 0xC7, 0x2E,
+  /* S-box 5 */
+  0x2C, 0x41, 0x7A, 0xB6, 0x85, 0x3F, 0xD0, 0xE9,
+  0xEB, 0x2C, 0x47, 0xD1, 0x50, 0xFA, 0x39, 0x86,
+  0x42, 0x1B, 0xAD, 0x78, 0xF9, 0xC5, 0x63, 0x0E,
+  0xB8, 0xC7, 0x1E, 0x2D, 0x6F, 0x09, 0xA4, 0x53,
+  /* S-box 6 */
+  0xC1, 0xAF, 0x92, 0x68, 0x0D, 0x34, 0xE7, 0x5B,
+  0xAF, 0x42, 0x7C, 0x95, 0x61, 0xDE, 0x0B, 0x38,
+  0x9E, 0xF5, 0x28, 0xC3, 0x70, 0x4A, 0x1D, 0xB6,
+  0x43, 0x2C, 0x95, 0xFA, 0xBE, 0x17, 0x60, 0x8D,
+  /* S-box 7 */
+  0x4B, 0x2E, 0xF0, 0x8D, 0x3C, 0x97, 0x5A, 0x61,
+  0xD0, 0xB7, 0x49, 0x1A, 0xE3, 0x5C, 0x2F, 0x86,
+  0x14, 0xBD, 0xC3, 0x7E, 0xAF, 0x68, 0x05, 0x92,
+  0x6B, 0xD8, 0x14, 0xA7, 0x95, 0x0F, 0xE2, 0x3C,
+  /* S-box 8 */
+  0xD2, 0x84, 0x6F, 0xB1, 0xA9, 0x3E, 0x50, 0xC7,
+  0x1F, 0xD8, 0xA3, 0x74, 0xC5, 0x6B, 0x0E, 0x92,
+  0x7B, 0x41, 0x9C, 0xE2, 0x06, 0xAD, 0xF3, 0x58,
+  0x21, 0xE7, 0x4A, 0x8D, 0xFC, 0x90, 0x35, 0x6B
+};
+
+prog_uint8_t e_permtab[] ={ 
+	 4,  6, 					/* 4 bytes in 6 bytes out*/
+	32,  1,  2,  3,  4,  5,
+	 4,  5,  6,  7,  8,  9,
+	 8,  9, 10, 11, 12, 13,
+	12, 13, 14, 15, 16, 17,
+	16, 17, 18, 19, 20, 21,
+	20, 21, 22, 23, 24, 25,
+	24, 25, 26, 27, 28, 29,
+	28, 29, 30, 31, 32,  1
+};
+
+prog_uint8_t p_permtab[] ={ 
+	 4,  4,						/* 32 bit -> 32 bit */
+	16,  7, 20, 21,
+	29, 12, 28, 17,
+	 1, 15, 23, 26,
+	 5, 18, 31, 10,
+	 2,  8, 24, 14,
+	32, 27,  3,  9,
+	19, 13, 30,  6,
+	22, 11,  4, 25
+};
+
+prog_uint8_t ip_permtab[] ={ 
+	 8,  8,						/* 64 bit -> 64 bit */
+	58, 50, 42, 34, 26, 18, 10, 2,
+	60, 52, 44, 36, 28, 20, 12, 4,
+	62, 54, 46, 38, 30, 22, 14, 6,
+	64, 56, 48, 40, 32, 24, 16, 8,
+	57, 49, 41, 33, 25, 17,  9, 1,
+	59, 51, 43, 35, 27, 19, 11, 3,
+	61, 53, 45, 37, 29, 21, 13, 5,
+	63, 55, 47, 39, 31, 23, 15, 7
+};
+
+prog_uint8_t inv_ip_permtab[] ={ 
+	 8, 8,						/* 64 bit -> 64 bit */
+	40, 8, 48, 16, 56, 24, 64, 32,
+	39, 7, 47, 15, 55, 23, 63, 31,
+	38, 6, 46, 14, 54, 22, 62, 30,
+	37, 5, 45, 13, 53, 21, 61, 29,
+	36, 4, 44, 12, 52, 20, 60, 28,
+	35, 3, 43, 11, 51, 19, 59, 27,
+	34, 2, 42, 10, 50, 18, 58, 26,
+	33, 1, 41,  9, 49, 17, 57, 25
+};
+
+prog_uint8_t pc1_permtab[] ={ 
+	 8,  7, 					/* 64 bit -> 56 bit*/
+	57, 49, 41, 33, 25, 17,  9,
+	 1, 58, 50, 42, 34, 26, 18,
+	10,  2, 59, 51, 43, 35, 27,
+	19, 11,  3, 60, 52, 44, 36,
+	63, 55, 47, 39, 31, 23, 15,
+	 7, 62, 54, 46, 38, 30, 22,
+	14,  6, 61, 53, 45, 37, 29,
+	21, 13,  5, 28, 20, 12,  4
+};
+
+prog_uint8_t pc2_permtab[] ={ 
+	 7,	 6, 					/* 56 bit -> 48 bit */
+	14, 17, 11, 24,  1,  5,
+	 3, 28, 15,  6, 21, 10,
+	23, 19, 12,  4, 26,  8,
+	16,  7, 27, 20, 13,  2,
+	41, 52, 31, 37, 47, 55,
+	30, 40, 51, 45, 33, 48,
+	44, 49, 39, 56, 34, 53,
+	46, 42, 50, 36, 29, 32
+};
+
+prog_uint8_t splitin6bitword_permtab[] = {
+	 8,  8, 					/* 64 bit -> 64 bit */
+	64, 64,  1,  6,  2,  3,  4,  5, 
+	64, 64,  7, 12,  8,  9, 10, 11, 
+	64, 64, 13, 18, 14, 15, 16, 17, 
+	64, 64, 19, 24, 20, 21, 22, 23, 
+	64, 64, 25, 30, 26, 27, 28, 29, 
+	64, 64, 31, 36, 32, 33, 34, 35, 
+	64, 64, 37, 42, 38, 39, 40, 41, 
+	64, 64, 43, 48, 44, 45, 46, 47 
+};
+
+prog_uint8_t shiftkey_permtab[] = {
+	 7,  7, 					/* 56 bit -> 56 bit */
+	 2,  3,  4,  5,  6,  7,  8,  9,
+	10, 11, 12, 13, 14, 15, 16, 17,
+	18, 19, 20, 21, 22, 23, 24, 25, 
+	26, 27, 28,  1, 
+	30, 31, 32, 33, 34, 35, 36, 37, 
+	38, 39, 40, 41, 42, 43, 44, 45, 
+	46, 47, 48, 49, 50, 51, 52, 53, 
+	54, 55, 56, 29
+};
+
+prog_uint8_t shiftkeyinv_permtab[] = {
+	 7,  7,
+	28,  1,  2,  3,  4,  5,  6,  7,
+	 8,  9, 10, 11, 12, 13, 14, 15,
+	16, 17, 18, 19, 20, 21, 22, 23,
+	24, 25, 26, 27,
+	56, 29, 30, 31, 32, 33, 34, 35, 
+	36, 37, 38, 39, 40, 41, 42, 43, 
+	44, 45, 46, 47, 48, 49, 50, 51, 
+	52, 53, 54, 55
+};
+
+/*
+1 0
+1 0
+2 1
+2 1
+2 1
+2 1
+2 1
+2 1
+----
+1 0
+2 1
+2 1
+2 1
+2 1
+2 1
+2 1
+1 0
+*/
+#define ROTTABLE      0x7EFC 
+#define ROTTABLE_INV  0x3F7E
+/******************************************************************************/
+
+void permute(prog_uint8_t *ptable, const uint8_t *in, uint8_t *out){
+	uint8_t ib, ob; /* in-bytes and out-bytes */
+	uint8_t byte, bit; /* counter for bit and byte */
+	ib = pgm_read_byte(&(ptable[0]));
+	ob = pgm_read_byte(&(ptable[1]));
+	ptable = &(ptable[2]);
+	for(byte=0; byte<ob; ++byte){
+		uint8_t x,t=0;
+		for(bit=0; bit<8; ++bit){
+			x=pgm_read_byte(&(ptable[byte*8+bit])) -1 ;
+				t<<=1;
+			if((in[x/8]) & (0x80>>(x%8)) ){
+				t|=0x01;
+			}
+		}
+		out[byte]=t;
+	}
+}
+
+/******************************************************************************/
+
+void changeendian32(uint32_t * a){
+	*a = (*a & 0x000000FF) << 24 |
+		 (*a & 0x0000FF00) <<  8 |
+		 (*a & 0x00FF0000) >>  8 |
+		 (*a & 0xFF000000) >> 24;
+}
+
+/******************************************************************************/
+static inline
+void shiftkey(uint8_t *key){
+	uint8_t k[7];
+	memcpy(k, key, 7);
+	permute((prog_uint8_t*)shiftkey_permtab, k, key);	
+}
+
+/******************************************************************************/
+static inline
+void shiftkey_inv(uint8_t *key){
+	uint8_t k[7];
+	memcpy(k, key, 7);
+	permute((prog_uint8_t*)shiftkeyinv_permtab, k, key);
+	
+}
+
+/******************************************************************************/
+static inline
+uint64_t splitin6bitwords(uint64_t a){
+	uint64_t ret=0;
+	a &= 0x0000ffffffffffffLL;
+	permute((prog_uint8_t*)splitin6bitword_permtab, (uint8_t*)&a, (uint8_t*)&ret);	
+	return ret;
+}
+
+/******************************************************************************/
+
+static inline
+uint8_t substitute(uint8_t a, prog_uint8_t * sbp){
+	uint8_t x;	
+	x = pgm_read_byte(&(sbp[a>>1]));
+	x = (a&1)?x&0x0F:x>>4;
+	return x;
+	
+}
+
+/******************************************************************************/
+
+uint32_t des_f(uint32_t r, uint8_t* kr){
+	uint8_t i;
+	uint32_t t=0,ret;
+	uint64_t data;
+	prog_uint8_t *sbp; /* sboxpointer */ 
+	permute((prog_uint8_t*)e_permtab, (uint8_t*)&r, (uint8_t*)&data);
+	for(i=0; i<7; ++i)
+		((uint8_t*)&data)[i] ^= kr[i];
+	
+	/* Sbox substitution */
+	data = splitin6bitwords(data);
+	sbp=(prog_uint8_t*)sbox;
+	for(i=0; i<8; ++i){
+		uint8_t x;
+		x = substitute(((uint8_t*)&data)[i], sbp);
+		t<<=4;
+		t |= x;
+		sbp += 32;
+	}
+	changeendian32(&t);
+		
+	permute((prog_uint8_t*)p_permtab,(uint8_t*)&t, (uint8_t*)&ret);
+
+	return ret;
+}
+
+/******************************************************************************/
+
+void des_enc(void* out, const void* in, const void* key){
+#define R *((uint32_t*)&(data[4]))
+#define L *((uint32_t*)&(data[0]))
+
+	uint8_t data[8],kr[6],k[7];
+	uint8_t i;
+	
+	permute((prog_uint8_t*)ip_permtab, (uint8_t*)in, data);
+	permute((prog_uint8_t*)pc1_permtab, (uint8_t*)key, k);
+	for(i=0; i<8; ++i){
+		shiftkey(k);
+		if(ROTTABLE&((1<<((i<<1)+0))) )
+			shiftkey(k);
+		permute((prog_uint8_t*)pc2_permtab, k, kr);
+		L ^= des_f(R, kr);
+		
+		shiftkey(k);
+		if(ROTTABLE&((1<<((i<<1)+1))) )
+			shiftkey(k);
+		permute((prog_uint8_t*)pc2_permtab, k, kr);
+		R ^= des_f(L, kr);
+
+	}
+	/* L <-> R*/
+	R ^= L;
+	L ^= R;
+	R ^= L;
+	
+	permute((prog_uint8_t*)inv_ip_permtab, data, (uint8_t*)out);
+}
+
+/******************************************************************************/
+
+void des_dec(void* out, const void* in, const uint8_t* key){
+#define R *((uint32_t*)&(data[4]))
+#define L *((uint32_t*)&(data[0]))
+
+	uint8_t data[8],kr[6],k[7];
+	int8_t i;
+	
+	permute((prog_uint8_t*)ip_permtab, (uint8_t*)in, data);
+	permute((prog_uint8_t*)pc1_permtab, (uint8_t*)key, k);
+	for(i=7; i>=0; --i){
+		
+		permute((prog_uint8_t*)pc2_permtab, k, kr);
+		L ^= des_f(R, kr);
+		shiftkey_inv(k);
+		if(ROTTABLE&((1<<((i<<1)+1))) ){
+			shiftkey_inv(k);
+		}
+
+		permute((prog_uint8_t*)pc2_permtab, k, kr);
+		R ^= des_f(L, kr);
+		shiftkey_inv(k);
+		if(ROTTABLE&((1<<((i<<1)+0))) ){
+			shiftkey_inv(k);
+		}
+
+	}
+	/* L <-> R*/
+	R ^= L;
+	L ^= R;
+	R ^= L;
+	
+	permute((prog_uint8_t*)inv_ip_permtab, data, (uint8_t*)out);
+}
+
+/******************************************************************************/
+
+void tdes_enc(void* out, void* in, const void* key){
+	des_enc(out,  in, (uint8_t*)key + 0);
+	des_dec(out, out, (uint8_t*)key + 8);
+	des_enc(out, out, (uint8_t*)key +16);
+}
+
+/******************************************************************************/
+
+void tdes_dec(void* out, void* in, const uint8_t* key){
+	des_dec(out,  in, (uint8_t*)key + 0);
+	des_enc(out, out, (uint8_t*)key + 8);
+	des_dec(out, out, (uint8_t*)key +16);
+}
+
+/******************************************************************************/
+
+
diff --git a/des/des.h b/des/des.h
new file mode 100644
index 0000000..082de13
--- /dev/null
+++ b/des/des.h
@@ -0,0 +1,100 @@
+/* des.h */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/**
+ * \file	des.h
+ * \author	Daniel Otte 
+ * \date	2007-06-16
+ * \brief 	des and tdes declarations
+ * \license	GPLv3 or later
+ * 
+ */
+#ifndef DES_H_
+#define DES_H_
+
+/* the FIPS 46-3 (1999-10-25) name for triple DES is triple data encryption algorithm so TDEA.
+ * Also we only implement the three key mode  */
+
+/** \def tdea_enc
+ * \brief defining an alias for void tdes_enc(void* out, const void* in, const void* key)
+ */
+
+/** \def tdea_dec
+ * \brief defining an alias for void tdes_dec(void* out, const void* in, const void* key)
+ */
+
+#define tdea_enc tdes_enc
+#define tdea_dec tdes_dec
+
+/** \fn void des_enc(void* out, const void* in, const void* key)
+ * \brief encrypt a block with DES
+ * 
+ * This function encrypts a block of 64 bits (8 bytes) with the DES algorithm.
+ * Key expansion is done automatically. The key is 64 bits long, but note that
+ * only 56 bits are used (the LSB of each byte is droped). The input and output
+ * blocks may overlap.
+ * 
+ * \param out pointer to the block (64 bit = 8 byte) where the ciphertext is written to
+ * \param in  pointer to the block (64 bit = 8 byte) where the plaintext is read from
+ * \param key pointer to the key (64 bit = 8 byte)
+ */
+void des_enc(void* out, const void* in, const void* key);
+
+/** \fn void des_dec(void* out, const void* in, const void* key)
+ * \brief decrypt a block with DES
+ * 
+ * This function decrypts a block of 64 bits (8 bytes) with the DES algorithm.
+ * Key expansion is done automatically. The key is 64 bits long, but note that
+ * only 56 bits are used (the LSB of each byte is droped). The input and output
+ * blocks may overlap.
+ * 
+ * \param out pointer to the block (64 bit = 8 byte) where the plaintext is written to
+ * \param in  pointer to the block (64 bit = 8 byte) where the ciphertext is read from
+ * \param key pointer to the key (64 bit = 8 byte)
+ */
+void des_dec(void* out, const void* in, const void* key);
+
+/** \fn void tdes_enc(void* out, const void* in, const void* key)
+ * \brief encrypt a block with Tripple-DES
+ * 
+ * This function encrypts a block of 64 bits (8 bytes) with the Tripple-DES (EDE)
+ * algorithm. Key expansion is done automatically. The key is 192 bits long, but
+ * note that only 178 bits are used (the LSB of each byte is droped). The input
+ * and output blocks may overlap.
+ * 
+ * \param out pointer to the block (64 bit = 8 byte) where the ciphertext is written to
+ * \param in  pointer to the block (64 bit = 8 byte) where the plaintext is read from
+ * \param key pointer to the key (192 bit = 24 byte)
+ */
+void tdes_enc(void* out, const void* in, const void* key);
+
+/** \fn void tdes_dec(void* out, const void* in, const void* key)
+ * \brief decrypt a block with Tripple-DES
+ * 
+ * This function decrypts a block of 64 bits (8 bytes) with the Tripple-DES (EDE)
+ * algorithm. Key expansion is done automatically. The key is 192 bits long, but
+ * note that only 178 bits are used (the LSB of each byte is droped). The input
+ * and output blocks may overlap.
+ * 
+ * \param out pointer to the block (64 bit = 8 byte) where the plaintext is written to
+ * \param in  pointer to the block (64 bit = 8 byte) where the ciphertext is read from
+ * \param key pointer to the key (192 bit = 24 byte)
+ */
+ void tdes_dec(void* out, const void* in, const void* key);
+
+#endif /*DES_H_*/
diff --git a/entropium.c b/entropium.c
deleted file mode 100644
index ed56607..0000000
--- a/entropium.c
+++ /dev/null
@@ -1,131 +0,0 @@
-/* entropium.c */
-/*
-    This file is part of the AVR-Crypto-Lib.
-    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-/**
- * \file    entropium.c
- * \author  Daniel Otte
- * \email   daniel.otte@rub.de
- * \date    2006-05-17
- * \license	GPLv3 or later
- * \brief	This file contains an implementaition of a pseudo-random-number generator.
- * 
- * Extension 1:
- * 	rndCore is expanded to 512 bits for more security.
- *
-  \verbatim
-                       ################################################################################################
-                       #                                                                                              #
-                       #         +---------------------------+                                                        #
-                       #         |                           |                                                        #
-                       #         V                           |                                                        #
-                       #      (concat)                       |                                                        #
-   +---------------+   #    o---------o             (xor)+---------+      o---------o        o----o     o---------o   #    +--------------+
-   | entropy Block | -----> | sha-256 | --(offset)-<     | rndCore | ---> | sha-256 | --+----| +1 |---> | sha-256 | -----> | random Block |
-   +---------------+   #    o---------o             (xor)+---------+      o---------o   |    o----o     o---------o   #    +--------------+
-                       #                                 (xor) (xor)                    |                             #
-                       #                                   ^     ^                      |                             #
-                       #                                    \   /                       |                             #
-                       #                                   (offset)---------------------+                             #
-                       #                                                                                              #
-                       ################################################################################################
-  \endverbatim
- */
-
-#include <stdint.h>
-#include <string.h>
-#include "sha256.h"
-#include "entropium.h"
-
-/**
- * \brief secret entropy pool. 
- * This is the core of the random which is generated
- */
-uint32_t rndCore[16]; 
-
-/*************************************************************************/
-
-/* idea is: hash the message and add it via xor to rndCore
- *
- * length in bits 
- * 
- * we simply first "hash" rndCore, then entropy.
- */
-void entropium_addEntropy(unsigned length_b, const void* data){
-	sha256_ctx_t s;
-	static uint8_t offset=0; /* selects if higher or lower half gets updated */
-	sha256_init(&s);
-	sha256_nextBlock(&s, rndCore);
-	while (length_b>=512){
-		sha256_nextBlock(&s, data);
-		data = (uint8_t*)data+ 512/8;
-		length_b -= 512;	
-	}
-	sha256_lastBlock(&s, data, length_b);
-	uint8_t i;
-	for (i=0; i<8; ++i){
-		rndCore[i+offset] ^= s.h[i];
-	}
-	offset ^= 8; /* hehe */
-}
-
-/*************************************************************************/
-
-void entropium_getRandomBlock(void *b){
-	sha256_ctx_t s;
-	uint8_t offset=8;
-	
-	sha256_init(&s);
-	sha256_lastBlock(&s, rndCore, 512); /* remeber the byte order! */
-	uint8_t i;
-	for (i=0; i<8; ++i){
-		rndCore[i+offset] ^= s.h[i];
-	}
-	offset ^= 8; /* hehe */
-	memcpy(b, s.h, 32); /* back up first hash in b */
-	((uint8_t*)b)[*((uint8_t*)b)&31]++; 	/* the important increment step */
-	sha256_init(&s);
-	sha256_lastBlock(&s, b, 256);
-	memcpy(b, s.h, 32);
-}
-
-/*************************************************************************/
-
-uint8_t entropium_getRandomByte(void){
-	static uint8_t block[32];
-	static uint8_t i=32;
-	
-	if (i==32){
-		entropium_getRandomBlock((void*)block);
-		i=0;
-	}	
-	return block[i++];
-}
-
-void entropium_fillBlockRandom(void* block, unsigned length_B){
-	while(length_B>ENTROPIUM_RANDOMBLOCK_SIZE){
-		entropium_getRandomBlock(block);
-		block = (uint8_t*)block + ENTROPIUM_RANDOMBLOCK_SIZE;
-		length_B -= ENTROPIUM_RANDOMBLOCK_SIZE;
-	}
-	while(length_B){
-		*((uint8_t*)block) = entropium_getRandomByte();
-		block= (uint8_t*)block +1; --length_B;
-	}
-}
- 
- 
diff --git a/entropium.h b/entropium.h
deleted file mode 100644
index 303619d..0000000
--- a/entropium.h
+++ /dev/null
@@ -1,84 +0,0 @@
-/* entropium.h */
-/*
-    This file is part of the AVR-Crypto-Lib.
-    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-/**
- * File:		entropium.h
- * Author:		Daniel Otte
- * Date:		23.07.2006
- * License:		GPL
- * Description:	This file contains the declarations for the pseudo-random-number generator.
- **/
-
-/**
- * \file    entropium.h
- * \author  Daniel Otte
- * \date    23.07.2006
- * \license	GPLv3 or later
- * \brief	This file contains the declarations for the pseudo-random-number generator.
- **/
-
-
-#ifndef ENTROPIUM_H_
-#define ENTROPIUM_H_
-
-#include <stdint.h>
-/*
- * length in bits 
- */
-#define ENTROPIUM_RANDOMBLOCK_SIZE 32 /* bytes */
- 
-/** \fn void entropium_addEntropy(unsigned length_b, const void* data)
- * \brief add entropy to the prng
- * 
- * This function adds data to the internal entropy pool
- * \param length_b length of the data block in bits
- * \param data pointer to the data
- */
-void entropium_addEntropy(unsigned length_b, const void* data); 
-
-/** \fn void entropium_getRandomBlock(void* b)
- * \brief generate a fixed size block of random data 
- * 
- * This function writes 32 bytes of random extracted from the entropy pool
- * in the supplied buffer.
- * \param b buffer where the random data gets written
- */
-void entropium_getRandomBlock(void* b);
-
-/** \fn uint8_t entropium_getRandomByte(void)
- * \brief get a single byte of random data
- * 
- * This function utilizes a internal buffer which gets automatically filled
- * again. 
- * \return a byte of random data
- */ 
-uint8_t entropium_getRandomByte(void);
-
-/** \fn void entropium_fillBlockRandom(void* block, unsigned length_B)
- * \brief get a block of random data
- * 
- * This function writes random data extracted from the entropy pool in the 
- * supplied buffer. It shares a internal buffer with the 
- * entropium_getRandomByte() function.
- * \param block pointer to the buffer where the random data goes
- * \param length_B number of bytes to be written to the buffer
- */
-void entropium_fillBlockRandom(void* block, unsigned length_B);
-
-#endif /*PRNG_H_*/
diff --git a/entropium/entropium.c b/entropium/entropium.c
new file mode 100644
index 0000000..ed56607
--- /dev/null
+++ b/entropium/entropium.c
@@ -0,0 +1,131 @@
+/* entropium.c */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/**
+ * \file    entropium.c
+ * \author  Daniel Otte
+ * \email   daniel.otte@rub.de
+ * \date    2006-05-17
+ * \license	GPLv3 or later
+ * \brief	This file contains an implementaition of a pseudo-random-number generator.
+ * 
+ * Extension 1:
+ * 	rndCore is expanded to 512 bits for more security.
+ *
+  \verbatim
+                       ################################################################################################
+                       #                                                                                              #
+                       #         +---------------------------+                                                        #
+                       #         |                           |                                                        #
+                       #         V                           |                                                        #
+                       #      (concat)                       |                                                        #
+   +---------------+   #    o---------o             (xor)+---------+      o---------o        o----o     o---------o   #    +--------------+
+   | entropy Block | -----> | sha-256 | --(offset)-<     | rndCore | ---> | sha-256 | --+----| +1 |---> | sha-256 | -----> | random Block |
+   +---------------+   #    o---------o             (xor)+---------+      o---------o   |    o----o     o---------o   #    +--------------+
+                       #                                 (xor) (xor)                    |                             #
+                       #                                   ^     ^                      |                             #
+                       #                                    \   /                       |                             #
+                       #                                   (offset)---------------------+                             #
+                       #                                                                                              #
+                       ################################################################################################
+  \endverbatim
+ */
+
+#include <stdint.h>
+#include <string.h>
+#include "sha256.h"
+#include "entropium.h"
+
+/**
+ * \brief secret entropy pool. 
+ * This is the core of the random which is generated
+ */
+uint32_t rndCore[16]; 
+
+/*************************************************************************/
+
+/* idea is: hash the message and add it via xor to rndCore
+ *
+ * length in bits 
+ * 
+ * we simply first "hash" rndCore, then entropy.
+ */
+void entropium_addEntropy(unsigned length_b, const void* data){
+	sha256_ctx_t s;
+	static uint8_t offset=0; /* selects if higher or lower half gets updated */
+	sha256_init(&s);
+	sha256_nextBlock(&s, rndCore);
+	while (length_b>=512){
+		sha256_nextBlock(&s, data);
+		data = (uint8_t*)data+ 512/8;
+		length_b -= 512;	
+	}
+	sha256_lastBlock(&s, data, length_b);
+	uint8_t i;
+	for (i=0; i<8; ++i){
+		rndCore[i+offset] ^= s.h[i];
+	}
+	offset ^= 8; /* hehe */
+}
+
+/*************************************************************************/
+
+void entropium_getRandomBlock(void *b){
+	sha256_ctx_t s;
+	uint8_t offset=8;
+	
+	sha256_init(&s);
+	sha256_lastBlock(&s, rndCore, 512); /* remeber the byte order! */
+	uint8_t i;
+	for (i=0; i<8; ++i){
+		rndCore[i+offset] ^= s.h[i];
+	}
+	offset ^= 8; /* hehe */
+	memcpy(b, s.h, 32); /* back up first hash in b */
+	((uint8_t*)b)[*((uint8_t*)b)&31]++; 	/* the important increment step */
+	sha256_init(&s);
+	sha256_lastBlock(&s, b, 256);
+	memcpy(b, s.h, 32);
+}
+
+/*************************************************************************/
+
+uint8_t entropium_getRandomByte(void){
+	static uint8_t block[32];
+	static uint8_t i=32;
+	
+	if (i==32){
+		entropium_getRandomBlock((void*)block);
+		i=0;
+	}	
+	return block[i++];
+}
+
+void entropium_fillBlockRandom(void* block, unsigned length_B){
+	while(length_B>ENTROPIUM_RANDOMBLOCK_SIZE){
+		entropium_getRandomBlock(block);
+		block = (uint8_t*)block + ENTROPIUM_RANDOMBLOCK_SIZE;
+		length_B -= ENTROPIUM_RANDOMBLOCK_SIZE;
+	}
+	while(length_B){
+		*((uint8_t*)block) = entropium_getRandomByte();
+		block= (uint8_t*)block +1; --length_B;
+	}
+}
+ 
+ 
diff --git a/entropium/entropium.h b/entropium/entropium.h
new file mode 100644
index 0000000..303619d
--- /dev/null
+++ b/entropium/entropium.h
@@ -0,0 +1,84 @@
+/* entropium.h */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * File:		entropium.h
+ * Author:		Daniel Otte
+ * Date:		23.07.2006
+ * License:		GPL
+ * Description:	This file contains the declarations for the pseudo-random-number generator.
+ **/
+
+/**
+ * \file    entropium.h
+ * \author  Daniel Otte
+ * \date    23.07.2006
+ * \license	GPLv3 or later
+ * \brief	This file contains the declarations for the pseudo-random-number generator.
+ **/
+
+
+#ifndef ENTROPIUM_H_
+#define ENTROPIUM_H_
+
+#include <stdint.h>
+/*
+ * length in bits 
+ */
+#define ENTROPIUM_RANDOMBLOCK_SIZE 32 /* bytes */
+ 
+/** \fn void entropium_addEntropy(unsigned length_b, const void* data)
+ * \brief add entropy to the prng
+ * 
+ * This function adds data to the internal entropy pool
+ * \param length_b length of the data block in bits
+ * \param data pointer to the data
+ */
+void entropium_addEntropy(unsigned length_b, const void* data); 
+
+/** \fn void entropium_getRandomBlock(void* b)
+ * \brief generate a fixed size block of random data 
+ * 
+ * This function writes 32 bytes of random extracted from the entropy pool
+ * in the supplied buffer.
+ * \param b buffer where the random data gets written
+ */
+void entropium_getRandomBlock(void* b);
+
+/** \fn uint8_t entropium_getRandomByte(void)
+ * \brief get a single byte of random data
+ * 
+ * This function utilizes a internal buffer which gets automatically filled
+ * again. 
+ * \return a byte of random data
+ */ 
+uint8_t entropium_getRandomByte(void);
+
+/** \fn void entropium_fillBlockRandom(void* block, unsigned length_B)
+ * \brief get a block of random data
+ * 
+ * This function writes random data extracted from the entropy pool in the 
+ * supplied buffer. It shares a internal buffer with the 
+ * entropium_getRandomByte() function.
+ * \param block pointer to the buffer where the random data goes
+ * \param length_B number of bytes to be written to the buffer
+ */
+void entropium_fillBlockRandom(void* block, unsigned length_B);
+
+#endif /*PRNG_H_*/
diff --git a/entropium/sha256-asm.S b/entropium/sha256-asm.S
new file mode 100644
index 0000000..d9eb6b6
--- /dev/null
+++ b/entropium/sha256-asm.S
@@ -0,0 +1,1042 @@
+/* sha256-asm.S */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/*
+ * Author:	Daniel Otte
+ *
+ * License: GPLv3 or later
+*/
+; sha-256 implementation in assembler	
+SHA256_BLOCK_BITS = 512
+SHA256_HASH_BITS = 256
+
+.macro precall
+	/* push r18 - r27, r30 - r31*/
+	push r0
+	push r1
+	push r18
+	push r19
+	push r20
+	push r21
+	push r22
+	push r23
+	push r24
+	push r25
+	push r26
+	push r27
+	push r30
+	push r31
+	clr r1
+.endm
+
+.macro postcall
+	pop r31
+	pop r30
+	pop r27
+	pop r26
+	pop r25
+	pop r24
+	pop r23
+	pop r22
+	pop r21
+	pop r20
+	pop r19
+	pop r18
+	pop r1
+	pop r0
+.endm
+
+
+.macro hexdump length
+	push r27
+	push r26
+	ldi r25, '\r'
+	mov r24, r25
+	call uart_putc
+	ldi r25, '\n'
+	mov r24, r25
+	call uart_putc
+	pop r26
+	pop r27
+	movw r24, r26
+.if \length > 16
+	ldi r22, lo8(16)
+	ldi r23, hi8(16)
+	push r27
+	push r26
+	call uart_hexdump
+	pop r26
+	pop r27
+	adiw r26, 16
+	hexdump \length-16
+.else
+	ldi r22, lo8(\length)
+	ldi r23, hi8(\length)
+	call uart_hexdump
+.endif
+.endm
+
+/* X points to Block */
+.macro dbg_hexdump length
+	precall
+	hexdump \length
+	postcall
+.endm
+
+.section .text
+
+SPL = 0x3D
+SPH = 0x3E
+SREG = 0x3F
+
+
+;
+;sha256_ctx_t is:
+;
+; [h0][h1][h2][h3][h4][h5][h6][h7][length]
+; hn is 32 bit large, length is 64 bit large
+
+;###########################################################	
+
+.global sha256_ctx2hash
+; === sha256_ctx2hash ===
+; this function converts a state into a normal hash (bytestring)
+;  param1: the 16-bit destination pointer
+;	given in r25,r24 (r25 is most significant)
+;  param2: the 16-bit pointer to sha256_ctx structure
+;	given in r23,r22
+sha256_ctx2hash:
+	movw r26, r22
+	movw r30, r24
+	ldi r21, 8
+	sbiw r26, 4
+1:	
+	ldi r20, 4
+	adiw r26, 8
+2:	
+		ld r0, -X
+		st Z+, r0	
+	dec r20
+	brne 2b
+	
+	dec r21
+	brne 1b
+	
+	ret
+
+;###########################################################	
+
+.global sha256
+; === sha256 ===
+; this function calculates SHA-256 hashes from messages in RAM
+;  param1: the 16-bit hash destination pointer
+;	given in r25,r24 (r25 is most significant)
+;  param2: the 16-bit pointer to message
+;	given in r23,r22
+;  param3: 32-bit length value (length of message in bits)
+;   given in r21,r20,r19,r18
+sha256:
+sha256_prolog:
+	push r8
+	push r9
+	push r10
+	push r11
+	push r12
+	push r13
+	push r16
+	push r17
+	in r16, SPL
+	in r17, SPH
+	subi r16, 8*4+8 
+	sbci r17, 0	
+	in r0, SREG
+	cli
+	out SPL, r16
+	out SPH, r17
+	out SREG, r0
+	
+	push r25
+	push r24
+	inc r16
+	adc r17, r1
+	
+	movw r8, r18		/* backup of length*/
+	movw r10, r20
+	
+	movw r12, r22	/* backup pf msg-ptr */
+	
+	movw r24, r16
+	rcall sha256_init
+	/* if length >= 512 */
+1:
+	tst r11
+	brne 4f
+	tst r10
+	brne 4f
+	mov r19, r9
+	cpi r19, 0x02
+	brlo 4f
+	
+	movw r24, r16
+	movw r22, r12
+	rcall sha256_nextBlock
+	ldi r19, 0x64
+	add r22, r19
+	adc r23, r1
+	/* length -= 512 */
+	ldi r19, 0x02
+	sub r9, r19
+	sbc r10, r1
+	sbc r11, r1
+	rjmp 1b
+	
+4:
+	movw r24, r16
+	movw r22, r12
+	movw r20, r8
+	rcall sha256_lastBlock
+	
+	pop r24
+	pop r25
+	movw r22, r16
+	rcall sha256_ctx2hash	
+	
+sha256_epilog:
+	in r30, SPL
+	in r31, SPH
+	adiw r30, 8*4+8 	
+	in r0, SREG
+	cli
+	out SPL, r30
+	out SPH, r31
+	out SREG, r0
+	pop r17
+	pop r16
+	pop r13
+	pop r12
+	pop r11
+	pop r10
+	pop r9
+	pop r8
+	ret
+
+;###########################################################	
+
+
+; block MUST NOT be larger than 64 bytes
+
+.global sha256_lastBlock
+; === sha256_lastBlock ===
+; this function does padding & Co. for calculating SHA-256 hashes
+;  param1: the 16-bit pointer to sha256_ctx structure
+;	given in r25,r24 (r25 is most significant)
+;  param2: an 16-bit pointer to 64 byte block to hash
+;	given in r23,r22
+;  param3: an 16-bit integer specifing length of block in bits
+;	given in r21,r20
+sha256_lastBlock_localSpace = (SHA256_BLOCK_BITS/8+1)
+
+
+sha256_lastBlock:
+	cpi r21, 0x02
+	brlo sha256_lastBlock_prolog
+	push r25
+	push r24
+	push r23
+	push r22
+	push r21
+	push r20
+	rcall sha256_nextBlock
+	pop r20
+	pop r21
+	pop r22
+	pop r23
+	pop r24
+	pop r25
+	subi r21, 0x02
+	subi r23, -2
+	rjmp sha256_lastBlock	
+sha256_lastBlock_prolog:
+	/* allocate space on stack */
+	in r30, SPL
+	in r31, SPH
+	in r1, SREG
+	subi r30, lo8(64)
+	sbci r31, hi8(64)
+	cli
+	out SPL, r30
+	out SPH, r31
+	out SREG,r1
+
+	adiw r30, 1 /* SP points to next free byte on stack */
+	mov r18, r20 /* r20 = LSB(length) */
+	lsr r18
+	lsr r18
+	lsr r18
+	bst r21, 0	/* may be we should explain this ... */
+	bld r18, 5  /* now: r18 == length/8 (aka. length in bytes) */
+	
+	
+	movw r26, r22 /* X points to begin of msg */
+	tst r18
+	breq sha256_lastBlock_post_copy
+	mov r1, r18
+sha256_lastBlock_copy_loop:
+	ld r0, X+
+	st Z+, r0
+	dec r1
+	brne sha256_lastBlock_copy_loop
+sha256_lastBlock_post_copy:	
+sha256_lastBlock_insert_stuffing_bit:	
+	ldi r19, 0x80
+	mov r0,r19 	
+	ldi r19, 0x07
+	and r19, r20 /* if we are in bitmode */
+	breq 2f	/* no bitmode */
+1:	
+	lsr r0
+	dec r19
+	brne 1b
+	ld r19, X
+/* maybe we should do some ANDing here, just for safety */
+	or r0, r19
+2:	
+	st Z+, r0
+	inc r18
+
+/* checking stuff here */
+	cpi r18, 64-8+1
+	brsh 0f 
+	rjmp sha256_lastBlock_insert_zeros
+0:
+	/* oh shit, we landed here */
+	/* first we have to fill it up with zeros */
+	ldi r19, 64
+	sub r19, r18
+	breq 2f
+1:	
+	st Z+, r1
+	dec r19
+	brne 1b	
+2:	
+	sbiw r30, 63
+	sbiw r30,  1
+	movw r22, r30
+	
+	push r31
+	push r30
+	push r25
+	push r24
+	push r21
+	push r20
+	rcall sha256_nextBlock
+	pop r20
+	pop r21
+	pop r24
+	pop r25
+	pop r30
+	pop r31
+	
+	/* now we should subtract 512 from length */
+	movw r26, r24
+	adiw r26, 4*8+1 /* we can skip the lowest byte */
+	ld r19, X
+	subi r19, hi8(512)
+	st X+, r19
+	ldi r18, 6
+1:
+	ld r19, X
+	sbci r19, 0
+	st X+, r19
+	dec r18
+	brne 1b
+	
+;	clr r18 /* not neccessary ;-) */
+	/* reset Z pointer to begin of block */
+
+sha256_lastBlock_insert_zeros:	
+	ldi r19, 64-8
+	sub r19, r18
+	breq sha256_lastBlock_insert_length
+	clr r1
+1:
+	st Z+, r1	/* r1 is still zero */
+	dec r19
+	brne 1b
+
+;	rjmp sha256_lastBlock_epilog
+sha256_lastBlock_insert_length:
+	movw r26, r24	/* X points to state */
+	adiw r26, 8*4	/* X points to (state.length) */
+	adiw r30, 8		/* Z points one after the last byte of block */
+	ld r0, X+
+	add r0, r20
+	st -Z, r0
+	ld r0, X+
+	adc r0, r21
+	st -Z, r0
+	ldi r19, 6
+1:
+	ld r0, X+
+	adc r0, r1
+	st -Z, r0
+	dec r19
+	brne 1b
+
+	sbiw r30, 64-8
+	movw r22, r30
+	rcall sha256_nextBlock
+
+sha256_lastBlock_epilog:
+	in r30, SPL
+	in r31, SPH
+	in r1, SREG
+	adiw r30, 63 ; lo8(64)
+	adiw r30,  1  ; hi8(64)
+	cli
+	out SPL, r30
+	out SPH, r31
+	out SREG,r1
+	clr r1
+	clr r0
+	ret
+
+/**/
+;###########################################################	
+
+.global sha256_nextBlock
+; === sha256_nextBlock ===
+; this is the core function for calculating SHA-256 hashes
+;  param1: the 16-bit pointer to sha256_ctx structure
+;	given in r25,r24 (r25 is most significant)
+;  param2: an 16-bit pointer to 64 byte block to hash
+;	given in r23,r22
+sha256_nextBlock_localSpace = (64+8)*4 ; 64 32-bit values for w array and 8 32-bit values for a array (total 288 byte)
+
+Bck1 = 12
+Bck2 = 13
+Bck3 = 14
+Bck4 = 15
+Func1 = 22
+Func2 = 23
+Func3 = 24
+Func4 = 25
+Accu1 = 16
+Accu2 = 17
+Accu3 = 18
+Accu4 = 19
+XAccu1 = 8
+XAccu2 = 9
+XAccu3 = 10
+XAccu4 = 11
+T1	= 4
+T2	= 5
+T3	= 6
+T4	= 7
+LoopC = 1
+/* byteorder: high number <--> high significance */
+sha256_nextBlock:
+ ; initial, let's make some space ready for local vars
+	push r4 /* replace push & pop by mem ops? */
+	push r5
+	push r6
+	push r7
+	push r8
+	push r9
+	push r10
+	push r11
+	push r12
+	push r13
+	push r14
+	push r15
+	push r16
+	push r17
+	push r28
+	push r29
+	in r20, SPL
+	in r21, SPH
+	movw r18, r20			;backup SP
+;	movw r26, r20			; X points to free space on stack 
+	movw r30, r22			; Z points to message
+	subi r20, lo8(sha256_nextBlock_localSpace) ;sbiw can do only up to 63
+	sbci r21, hi8(sha256_nextBlock_localSpace)
+	movw r26, r20			; X points to free space on stack 
+	in r0, SREG
+	cli ; we want to be uninterrupted while updating SP
+	out SPL, r20
+	out SPH, r21
+	out SREG, r0
+	push r18
+	push r19
+	push r24
+	push r25 /* param1 will be needed later */
+ ; now we fill the w array with message (think about endianess)
+ 	adiw r26, 1 ; X++
+ 	ldi r20, 16
+sha256_nextBlock_wcpyloop: 	
+ 	ld r23, Z+
+ 	ld r22, Z+
+ 	ld r19, Z+
+ 	ld r18, Z+
+ 	st X+, r18
+ 	st X+, r19
+ 	st X+, r22	
+	st X+, r23
+	dec r20
+	brne sha256_nextBlock_wcpyloop
+/*	for (i=16; i<64; ++i){
+		w[i] = SIGMA_b(w[i-2]) + w[i-7] + SIGMA_a(w[i-15]) + w[i-16];	
+	} */
+	/* r25,r24,r23,r24 (r21,r20) are function values
+	   r19,r18,r17,r16 are the accumulator
+	   r15,r14,r13,rBck1 are backup1
+	   r11,r10,r9 ,r8  are xor accu   
+	   r1 is round counter 								*/
+
+	ldi r20, 64-16
+	mov LoopC, r20
+sha256_nextBlock_wcalcloop:		 
+	movw r30, r26 ; cp X to Z
+	sbiw r30, 63
+	sbiw r30, 1 		; substract 64 = 16*4
+	ld Accu1, Z+
+	ld Accu2, Z+
+	ld Accu3, Z+
+	ld Accu4, Z+ /* w[i] = w[i-16] */
+	ld Bck1, Z+
+	ld Bck2, Z+
+	ld Bck3, Z+
+	ld Bck4, Z+ /* backup = w[i-15] */
+	/* now sigma 0 */
+	mov Func1, Bck2
+	mov Func2, Bck3
+	mov Func3, Bck4
+	mov Func4, Bck1  /* prerotated by 8 */
+	ldi r20, 1
+	rcall bitrotl
+	movw XAccu1, Func1
+	movw XAccu3, Func3	 /* store ROTR(w[i-15],7) in xor accu */
+	movw Func1, Bck3
+	movw Func3, Bck1 /* prerotated by 16 */
+	ldi r20, 2
+	rcall bitrotr
+	eor XAccu1, Func1  /* xor ROTR(w[i-15], 18)*/
+	eor XAccu2, Func2
+	eor XAccu3, Func3
+	eor XAccu4, Func4
+	ldi Func2, 3		 /* now shr3 */ /*we can destroy backup now*/
+sigma0_shr:
+	lsr Bck4
+	ror Bck3
+	ror Bck2
+	ror Bck1	
+	dec Func2
+	brne sigma0_shr
+	eor XAccu1, Bck1
+	eor XAccu2, Bck2
+	eor XAccu3, Bck3
+	eor XAccu4, Bck4	/* xor SHR(w[i-15], 3)*/ /* xor accu == sigma1(w[i-15]) */
+	add Accu1, XAccu1
+	adc Accu2, XAccu2
+	adc Accu3, XAccu3
+	adc Accu4, XAccu4 /* finished with sigma0 */
+	ldd Func1, Z+7*4  /* now accu += w[i-7] */
+	ldd Func2, Z+7*4+1
+	ldd Func3, Z+7*4+2
+	ldd Func4, Z+7*4+3
+	add Accu1, Func1
+	adc Accu2, Func2
+	adc Accu3, Func3
+	adc Accu4, Func4
+	ldd Bck1, Z+12*4 /* now backup = w[i-2]*/
+	ldd Bck2, Z+12*4+1
+	ldd Bck3, Z+12*4+2
+	ldd Bck4, Z+12*4+3
+	/* now sigma 1 */
+	movw Func1, Bck3
+	movw Func3, Bck1 /* prerotated by 16 */
+	ldi r20, 1
+	rcall bitrotr
+	movw XAccu3, Func3
+	movw XAccu1, Func1	 /* store in ROTR(w[i-2], 17) xor accu */
+;	movw Func1, Bck3
+;	movw Func3, Bck1 /* prerotated by 16 */
+	ldi r20, 2
+	rcall bitrotr
+	eor XAccu1, Func1  /* xor ROTR(w[i-2], 19)*/
+	eor XAccu2, Func2
+	eor XAccu3, Func3
+	eor XAccu4, Func4
+	ldi Func2, 2	 /* now shr10 (dirty trick, skipping a byte) */ /*we can destroy backup now*/
+sigma1_shr:
+	lsr Bck4
+	ror Bck3
+	ror Bck2	
+	dec Func2
+	brne sigma1_shr
+	eor XAccu1, Bck2
+	eor XAccu2, Bck3
+	eor XAccu3, Bck4  /* xor SHR(w[i-2], 10)*/ /* xor accu == sigma1(w[i-15]) */
+	add Accu1, XAccu1
+	adc Accu2, XAccu2
+	adc Accu3, XAccu3
+	adc Accu4, XAccu4 /* finished with sigma0 */
+	/* now let's store the shit */
+	st X+, Accu1
+	st X+, Accu2
+	st X+, Accu3
+	st X+, Accu4
+	dec LoopC
+	breq 3f  ; skip if zero
+	rjmp sha256_nextBlock_wcalcloop
+3:
+	/* we are finished with w array X points one byte post w */
+/* init a array */
+	pop r31
+	pop r30
+	push r30
+	push r31
+	ldi r25, 8*4 /* 8 32-bit values to copy from ctx to a array */
+init_a_array:	
+	ld r1, Z+
+	st X+, r1
+	dec r25
+	brne init_a_array
+	
+/* now the real fun begins */
+/* for (i=0; i<64; ++i){
+			t1 = a[7] + SIGMA1(a[4]) + CH(a[4],a[5],a[6]) + k[i] + w[i];
+			t2 = SIGMA0(a[0]) + MAJ(a[0],a[1],a[2]);
+			memmove(&(a[1]), &(a[0]), 7*4); 	// a[7]=a[6]; a[6]=a[5]; a[5]=a[4]; a[4]=a[3]; a[3]=a[2]; a[2]=a[1]; a[1]=a[0]; 
+			a[4] += t1;
+			a[0] = t1 + t2;
+		} */
+	/* Y points to a[0], Z ('cause lpm wants it) points to k[i], X points to w[i] */
+	sbiw r26, 8*4  /* X still points at a[7]+1*/
+	movw r28, r26
+	ldi r30, lo8(sha256_kv)
+	ldi r31, hi8(sha256_kv)		
+	dec r27  /* X - (64*4 == 256) */
+	ldi r25, 64
+	mov LoopC, r25
+sha256_main_loop:
+	/* now calculate t1 */
+	 /*CH(x,y,z) = (x&y)^((~x)&z)*/
+	ldd T1, Y+5*4
+	ldd T2, Y+5*4+1
+	ldd T3, Y+5*4+2
+	ldd T4, Y+5*4+3 /* y in T */
+	ldd Func1, Y+4*4
+	ldd Func2, Y+4*4+1
+	ldd Func3, Y+4*4+2
+	ldd Func4, Y+4*4+3  /* x in Func */
+	ldd Bck1, Y+6*4
+	ldd Bck2, Y+6*4+1
+	ldd Bck3, Y+6*4+2
+	ldd Bck4, Y+6*4+3 /* z in Bck */
+	and T1, Func1
+	and T2, Func2
+	and T3, Func3
+	and T4, Func4
+	com Func1
+	com Func2
+	com Func3
+	com Func4
+	and Bck1, Func1
+	and Bck2, Func2
+	and Bck3, Func3
+	and Bck4, Func4
+	eor T1, Bck1
+	eor T2, Bck2
+	eor T3, Bck3
+	eor T4, Bck4 /* done, CH(x,y,z) is in T */
+	/* now SIGMA1(a[4]) */
+	ldd Bck4, Y+4*4		/* think about using it from Func reg above*/
+	ldd Bck1, Y+4*4+1	
+	ldd Bck2, Y+4*4+2
+	ldd Bck3, Y+4*4+3 /* load prerotate by 8-bit */	
+	movw Func1, Bck1
+	movw Func3, Bck3
+	ldi r20, 2 
+	rcall bitrotl		/* rotr(x,6) */ 
+	movw XAccu1, Func1
+	movw XAccu3, Func3
+	movw Func1, Bck1
+	movw Func3, Bck3
+	ldi r20, 3 
+	rcall bitrotr 	/* rotr(x,11) */
+	eor XAccu1, Func1
+	eor XAccu2, Func2
+	eor XAccu3, Func3
+	eor XAccu4, Func4
+	movw Func1, Bck3 /* this prerotates furteh 16 bits*/
+	movw Func3, Bck1 /* so we have now prerotated by 24 bits*/
+	ldi r20, 1 
+	rcall bitrotr 	/* rotr(x,11) */
+	eor XAccu1, Func1
+	eor XAccu2, Func2
+	eor XAccu3, Func3
+	eor XAccu4, Func4 /* finished with SIGMA1, add it to T */
+	add T1, XAccu1
+	adc T2, XAccu2
+	adc T3, XAccu3
+	adc T4, XAccu4
+	/* now we've to add a[7], w[i] and k[i] */
+	ldd XAccu1, Y+4*7
+	ldd XAccu2, Y+4*7+1
+	ldd XAccu3, Y+4*7+2
+	ldd XAccu4, Y+4*7+3
+	add T1, XAccu1
+	adc T2, XAccu2
+	adc T3, XAccu3
+	adc T4, XAccu4 /* add a[7] */
+	ld XAccu1, X+
+	ld XAccu2, X+
+	ld XAccu3, X+
+	ld XAccu4, X+
+	add T1, XAccu1
+	adc T2, XAccu2
+	adc T3, XAccu3
+	adc T4, XAccu4 /* add w[i] */
+	lpm XAccu1, Z+
+	lpm XAccu2, Z+
+	lpm XAccu3, Z+
+	lpm XAccu4, Z+
+	add T1, XAccu1
+	adc T2, XAccu2
+	adc T3, XAccu3
+	adc T4, XAccu4 /* add k[i] */ /* finished with t1 */
+	/*now t2 = SIGMA0(a[0]) + MAJ(a[0],a[1],a[2]) */ /*i did to much x86 asm, i always see 4 32bit regs*/
+		/* starting with MAJ(x,y,z) */
+	ldd Func1, Y+4*0+0
+	ldd Func2, Y+4*0+1
+	ldd Func3, Y+4*0+2
+	ldd Func4, Y+4*0+3 /* load x=a[0] */
+	ldd XAccu1, Y+4*1+0
+	ldd XAccu2, Y+4*1+1
+	ldd XAccu3, Y+4*1+2
+	ldd XAccu4, Y+4*1+3 /* load y=a[1] */
+	and XAccu1, Func1
+	and XAccu2, Func2
+	and XAccu3, Func3
+	and XAccu4, Func4	/* XAccu == (x & y) */
+	ldd Bck1, Y+4*2+0
+	ldd Bck2, Y+4*2+1
+	ldd Bck3, Y+4*2+2
+	ldd Bck4, Y+4*2+3 /* load z=a[2] */
+	and Func1, Bck1
+	and Func2, Bck2
+	and Func3, Bck3
+	and Func4, Bck4
+	eor XAccu1, Func1
+	eor XAccu2, Func2
+	eor XAccu3, Func3
+	eor XAccu4, Func4	/* XAccu == (x & y) ^ (x & z) */
+	ldd Func1, Y+4*1+0
+	ldd Func2, Y+4*1+1
+	ldd Func3, Y+4*1+2
+	ldd Func4, Y+4*1+3 /* load y=a[1] */
+	and Func1, Bck1
+	and Func2, Bck2
+	and Func3, Bck3
+	and Func4, Bck4
+	eor XAccu1, Func1
+	eor XAccu2, Func2
+	eor XAccu3, Func3
+	eor XAccu4, Func4	/* XAccu == Maj(x,y,z) == (x & y) ^ (x & z) ^ (y & z) */
+   	/* SIGMA0(a[0]) */
+	ldd Bck1, Y+4*0+0 /* we should combine this with above */
+	ldd Bck2, Y+4*0+1
+	ldd Bck3, Y+4*0+2
+	ldd Bck4, Y+4*0+3
+	movw Func1, Bck1
+	movw Func3, Bck3
+	ldi r20, 2
+	rcall bitrotr
+	movw Accu1, Func1
+	movw Accu3, Func3 /* Accu = shr(a[0], 2) */
+	movw Func1, Bck3 
+	movw Func3, Bck1 /* prerotate by 16 bits */
+	ldi r20, 3
+	rcall bitrotl
+	eor Accu1, Func1
+	eor Accu2, Func2
+	eor Accu3, Func3
+	eor Accu4, Func4 /* Accu ^= shr(a[0], 13) */
+	mov Func1, Bck4
+	mov Func2, Bck1
+	mov Func3, Bck2
+	mov Func4, Bck3  /* prerotate by 24 bits */
+	ldi r20, 2
+	rcall bitrotl
+	eor Accu1, Func1
+	eor Accu2, Func2
+	eor Accu3, Func3
+	eor Accu4, Func4 /* Accu ^= shr(a[0], 22) */
+	add Accu1, XAccu1 /* add previous result (MAJ)*/
+	adc Accu2, XAccu2
+	adc Accu3, XAccu3
+	adc Accu4, XAccu4
+	/* now we are finished with the computing stuff (t1 in T, t2 in Accu)*/
+	/* a[7]=a[6]; a[6]=a[5]; a[5]=a[4]; a[4]=a[3]; a[3]=a[2]; a[2]=a[1]; a[1]=a[0]; */
+
+	ldi r21, 7*4
+	adiw r28, 7*4
+a_shift_loop:
+	ld  r25, -Y /* warning: this is PREdecrement */
+	std Y+4, r25
+	dec r21
+	brne a_shift_loop
+
+	ldd Bck1, Y+4*4+0
+	ldd Bck2, Y+4*4+1
+	ldd Bck3, Y+4*4+2
+	ldd Bck4, Y+4*4+3
+	add Bck1, T1
+	adc Bck2, T2
+	adc Bck3, T3
+	adc Bck4, T4
+	std Y+4*4+0, Bck1
+	std Y+4*4+1, Bck2
+	std Y+4*4+2, Bck3
+	std Y+4*4+3, Bck4
+	add Accu1, T1
+	adc Accu2, T2
+	adc Accu3, T3
+	adc Accu4, T4
+	std Y+4*0+0, Accu1
+	std Y+4*0+1, Accu2
+	std Y+4*0+2, Accu3
+	std Y+4*0+3, Accu4 /* a array updated */
+	
+	
+	dec LoopC
+	breq update_state
+	rjmp sha256_main_loop ;brne sha256_main_loop
+update_state:	
+	/* update state */
+	/* pointers to state should still exist on the stack ;-) */
+	pop r31
+	pop r30
+	ldi r21, 8
+update_state_loop:
+	ldd Accu1, Z+0
+	ldd Accu2, Z+1
+	ldd Accu3, Z+2
+	ldd Accu4, Z+3 
+	ld Func1, Y+
+	ld Func2, Y+
+	ld Func3, Y+
+	ld Func4, Y+
+	add Accu1, Func1
+	adc Accu2, Func2
+	adc Accu3, Func3
+	adc Accu4, Func4
+	st Z+, Accu1
+	st Z+, Accu2
+	st Z+, Accu3
+	st Z+, Accu4
+	dec r21
+	brne update_state_loop
+	/* now we just have to update the length */
+	adiw r30, 1 /* since we add 512, we can simply skip the LSB */ 
+	ldi r21, 2
+	ldi r22, 6
+	ld r20, Z
+	add r20, r21
+	st Z+, r20	
+	clr r21
+sha256_nextBlock_fix_length:	
+	brcc sha256_nextBlock_epilog
+	ld r20, Z
+	adc r20, r21
+	st Z+, r20
+	dec r22
+	brne sha256_nextBlock_fix_length
+	
+; EPILOG
+sha256_nextBlock_epilog:
+/* now we should clean up the stack */
+	
+	pop r21
+	pop r20
+	in r0, SREG
+	cli ; we want to be uninterrupted while updating SP
+	out SPL, r20
+	out SPH, r21
+	out SREG, r0
+	
+	clr r1
+	pop r29
+	pop r28
+	pop r17
+	pop r16
+	pop r15
+	pop r14
+	pop r13
+	pop r12
+	pop r11
+	pop r10
+	pop r9
+	pop r8
+	pop r7
+	pop r6
+	pop r5
+	pop r4 
+	ret
+
+sha256_kv: ; round-key-vector stored in ProgMem 
+.word	0x2f98, 0x428a, 0x4491, 0x7137, 0xfbcf, 0xb5c0, 0xdba5, 0xe9b5, 0xc25b, 0x3956, 0x11f1, 0x59f1, 0x82a4, 0x923f, 0x5ed5, 0xab1c
+.word	0xaa98, 0xd807, 0x5b01, 0x1283, 0x85be, 0x2431, 0x7dc3, 0x550c, 0x5d74, 0x72be, 0xb1fe, 0x80de, 0x06a7, 0x9bdc, 0xf174, 0xc19b
+.word	0x69c1, 0xe49b, 0x4786, 0xefbe, 0x9dc6, 0x0fc1, 0xa1cc, 0x240c, 0x2c6f, 0x2de9, 0x84aa, 0x4a74, 0xa9dc, 0x5cb0, 0x88da, 0x76f9
+.word	0x5152, 0x983e, 0xc66d, 0xa831, 0x27c8, 0xb003, 0x7fc7, 0xbf59, 0x0bf3, 0xc6e0, 0x9147, 0xd5a7, 0x6351, 0x06ca, 0x2967, 0x1429
+.word	0x0a85, 0x27b7, 0x2138, 0x2e1b, 0x6dfc, 0x4d2c, 0x0d13, 0x5338, 0x7354, 0x650a, 0x0abb, 0x766a, 0xc92e, 0x81c2, 0x2c85, 0x9272
+.word	0xe8a1, 0xa2bf, 0x664b, 0xa81a, 0x8b70, 0xc24b, 0x51a3, 0xc76c, 0xe819, 0xd192, 0x0624, 0xd699, 0x3585, 0xf40e, 0xa070, 0x106a
+.word	0xc116, 0x19a4, 0x6c08, 0x1e37, 0x774c, 0x2748, 0xbcb5, 0x34b0, 0x0cb3, 0x391c, 0xaa4a, 0x4ed8, 0xca4f, 0x5b9c, 0x6ff3, 0x682e
+.word	0x82ee, 0x748f, 0x636f, 0x78a5, 0x7814, 0x84c8, 0x0208, 0x8cc7, 0xfffa, 0x90be, 0x6ceb, 0xa450, 0xa3f7, 0xbef9, 0x78f2, 0xc671
+
+	
+;###########################################################	
+
+.global sha256_init 
+;uint32_t sha256_init_vector[]={
+;  	0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A,
+;	0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19 };
+;
+;void sha256_init(sha256_ctx_t *state){
+;	state->length=0;
+;	memcpy(state->h, sha256_init_vector, 8*4);
+;}
+; param1: (r23,r24) 16-bit pointer to sha256_ctx_t struct in ram
+; modifys: Z(r30,r31), Func1, r22
+sha256_init:
+	movw r26, r24 ; (24,25) --> (26,27) load X with param1
+	ldi r30, lo8((sha256_init_vector))
+	ldi r31, hi8((sha256_init_vector))
+	ldi r22, 32+8
+sha256_init_vloop:	
+	lpm r23, Z+ 
+	st X+, r23
+	dec r22
+	brne sha256_init_vloop
+	ret
+	
+sha256_init_vector:
+.word 0xE667, 0x6A09
+.word 0xAE85, 0xBB67 
+.word 0xF372, 0x3C6E 
+.word 0xF53A, 0xA54F 
+.word 0x527F, 0x510E 
+.word 0x688C, 0x9B05 
+.word 0xD9AB, 0x1F83 
+.word 0xCD19, 0x5BE0
+.word 0x0000, 0x0000
+.word 0x0000, 0x0000
+
+;###########################################################	
+
+.global rotl32
+; === ROTL32 ===
+; function that rotates a 32 bit word to the left
+;  param1: the 32-bit word to rotate
+;	given in r25,r24,r23,r22 (r25 is most significant)
+;  param2: an 8-bit value telling how often to rotate
+;	given in r20
+; modifys: r21, r22
+rotl32:
+	cpi r20, 8
+	brlo bitrotl
+	mov r21, r25
+	mov r25, r24
+	mov r24, r23
+	mov r23, r22
+	mov r22, r21
+	subi r20, 8
+	rjmp rotl32
+bitrotl:
+	clr r21
+	clc
+bitrotl_loop:	
+	tst r20
+	breq fixrotl
+	rol r22
+	rol r23
+	rol r24
+	rol r25
+	rol r21
+	dec r20
+	rjmp bitrotl_loop
+fixrotl:
+	or r22, r21
+	ret
+	
+
+;###########################################################	
+
+.global rotr32
+; === ROTR32 ===
+; function that rotates a 32 bit word to the right
+;  param1: the 32-bit word to rotate
+;	given in r25,r24,r23,22 (r25 is most significant)
+;  param2: an 8-bit value telling how often to rotate
+;	given in r20
+; modifys: r21, r22
+rotr32:
+	cpi r20, 8
+	brlo bitrotr
+	mov r21, r22
+	mov r22, r23
+	mov r23, r24
+	mov r24, r25
+	mov r25, r21
+	subi r20, 8
+	rjmp rotr32
+bitrotr:
+	clr r21
+	clc
+bitrotr_loop:	
+	tst r20
+	breq fixrotr
+	ror r25
+	ror r24
+	ror r23
+	ror r22
+	ror r21
+	dec r20
+	rjmp bitrotr_loop
+fixrotr:
+	or r25, r21
+	ret
+	
+	
+;###########################################################	
+	
+.global change_endian32
+; === change_endian32 ===
+; function that changes the endianess of a 32-bit word
+;  param1: the 32-bit word
+;	given in r25,r24,r23,22 (r25 is most significant)
+;  modifys: r21, r22
+change_endian32:
+	movw r20,  r22 ; (r22,r23) --> (r20,r21)
+	mov r22, r25
+	mov r23, r24
+	mov r24, r21
+	mov r25, r20 
+	ret
+
diff --git a/entropium/sha256.h b/entropium/sha256.h
new file mode 100644
index 0000000..24960a3
--- /dev/null
+++ b/entropium/sha256.h
@@ -0,0 +1,122 @@
+/* sha256.h */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/**
+ * \file	sha256.h
+ * \author  Daniel Otte 
+ * \date    2006-05-16
+ * \license	GPLv3 or later
+ * 
+ */
+
+#ifndef SHA256_H_
+#define SHA256_H_
+
+#define __LITTLE_ENDIAN__
+
+
+#include <stdint.h>
+
+/** \def SHA256_HASH_BITS
+ * defines the size of a SHA-256 hash value in bits
+ */
+
+/** \def SHA256_HASH_BYTES
+ * defines the size of a SHA-256 hash value in bytes
+ */
+
+/** \def SHA256_BLOCK_BITS
+ * defines the size of a SHA-256 input block in bits
+ */
+
+/** \def SHA256_BLOCK_BYTES
+ * defines the size of a SHA-256 input block in bytes
+ */
+
+#define SHA256_HASH_BITS  256
+#define SHA256_HASH_BYTES (SHA256_HASH_BITS/8)
+#define SHA256_BLOCK_BITS 512
+#define SHA256_BLOCK_BYTES (SHA256_BLOCK_BITS/8)
+
+/** \typedef sha256_ctx_t
+ * \brief SHA-256 context type
+ * 
+ * A variable of this type may hold the state of a SHA-256 hashing process
+ */
+typedef struct {
+	uint32_t h[8];
+	uint64_t length;
+} sha256_ctx_t;
+
+/** \typedef sha256_hash_t
+ * \brief SHA-256 hash value type
+ * 
+ * A variable of this type may hold the hash value produced by the
+ * sha256_ctx2hash(sha256_hash_t* dest, const sha256_ctx_t* state) function.
+ */
+typedef uint8_t sha256_hash_t[SHA256_HASH_BYTES];
+
+/** \fn void sha256_init(sha256_ctx_t *state)
+ * \brief initialise a SHA-256 context
+ * 
+ * This function sets a ::sha256_ctx_t to the initial values for hashing.
+ * \param state pointer to the SHA-256 hashing context
+ */
+void sha256_init(sha256_ctx_t *state);
+
+/** \fn void sha256_nextBlock (sha256_ctx_t* state, const void* block)
+ * \brief update the context with a given block
+ * 
+ * This function updates the SHA-256 hash context by processing the given block
+ * of fixed length.
+ * \param state pointer to the SHA-256 hash context
+ * \param block pointer to the block of fixed length (512 bit = 64 byte)
+ */
+void sha256_nextBlock (sha256_ctx_t* state, const void* block);
+
+/** \fn void sha256_lastBlock(sha256_ctx_t* state, const void* block, uint16_t length_b)
+ * \brief finalize the context with the given block 
+ * 
+ * This function finalizes the SHA-256 hash context by processing the given block
+ * of variable length.
+ * \param state pointer to the SHA-256 hash context
+ * \param block pointer to the block of fixed length (512 bit = 64 byte)
+ * \param length_b the length of the block in bits
+ */
+void sha256_lastBlock(sha256_ctx_t* state, const void* block, uint16_t length_b);
+
+/** \fn void sha256_ctx2hash(sha256_hash_t* dest, const sha256_ctx_t* state)
+ * \brief convert the hash state into the hash value
+ * This function reads the context and writes the hash value to the destination
+ * \param dest pointer to the location where the hash value should be written
+ * \param state pointer to the SHA-256 hash context
+ */
+void sha256_ctx2hash(sha256_hash_t* dest, const sha256_ctx_t* state);
+
+/** \fn void sha256(sha256_hash_t* dest, const void* msg, uint32_t length_b)
+ * \brief simple SHA-256 hashing function for direct hashing
+ * 
+ * This function automaticaly hashes a given message of arbitary length with
+ * the SHA-256 hashing algorithm.
+ * \param dest pointer to the location where the hash value is going to be written to
+ * \param msg pointer to the message thats going to be hashed
+ * \param length_b length of the message in bits
+ */
+void sha256(sha256_hash_t* dest, const void* msg, uint32_t length_b);
+
+#endif /*SHA256_H_*/
diff --git a/grain.c b/grain.c
deleted file mode 100644
index 8d30d22..0000000
--- a/grain.c
+++ /dev/null
@@ -1,185 +0,0 @@
-/* grain.c */
-/*
-    This file is part of the AVR-Crypto-Lib.
-    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-/**
- * 
- * author: Daniel Otte
- * email:  daniel.otte@rub.de
- * license: GPLv3 or later
- * 
- */
-
-
-#include <stdint.h>
-#include <string.h>
-#include <avr/pgmspace.h>
-#include "grain.h"
-
-
-#define GRAIN_REVERSEKEY
-
-/* s0, s1, s2, ..., s78, s79 */
-#define S(i) ((ctx->lfsr[9-((i)/8)])>>(7-((i)%8)))
-/* b0, b1, b2, ..., b78, b79 */
-#define B(i) ((ctx->nfsr[9-((i)/8)])>>(7-((i)%8)))
-#define _B(i) (((ctx->nfsr[9-((i)/8)])>>(7-((i)%8)))&1)
-
-
-uint8_t h_lut[4] PROGMEM = {0x4C, 0xB6, 0xD3, 0x26};
-
-#ifdef GRAIN_BADOPTIMISATION
-uint8_t g_lut[128] PROGMEM = {
-        0xF0, 0xA5, 0x0F, 0x5A, 0x0F, 0x5A, 0xF0, 0xA5, 0x0F, 0x5A, 0xF0, 0xA5, 0xF0, 0x5A, 0x0F, 0x0F, 
-        0xC3, 0x96, 0x3C, 0x69, 0x3C, 0x69, 0xC3, 0x96, 0x9C, 0xC9, 0x63, 0x36, 0x63, 0xC9, 0x9C, 0x9C, 
-        0x0F, 0x5A, 0x0F, 0x5A, 0xF0, 0xA5, 0xF0, 0x5A, 0xF0, 0xA5, 0xF0, 0xA5, 0x0F, 0xA5, 0x0F, 0xF0, 
-        0x3C, 0x69, 0x3C, 0x69, 0xC3, 0x96, 0xC3, 0x69, 0x63, 0x36, 0x63, 0x36, 0x9C, 0x36, 0x9C, 0x63, 
-        0x0F, 0xD2, 0xF0, 0x2D, 0xF0, 0x2D, 0x0F, 0xD2, 0xF0, 0x2D, 0x0F, 0xD2, 0x0F, 0x2D, 0xF0, 0x78, 
-        0x3C, 0xE1, 0xC3, 0x1E, 0xC3, 0x1E, 0x3C, 0xE1, 0x63, 0xBE, 0x9C, 0x41, 0x9C, 0xBE, 0x63, 0xEB, 
-        0x00, 0xDD, 0x00, 0xDD, 0xFF, 0x22, 0xFF, 0xDD, 0xFF, 0x22, 0xFF, 0x22, 0x00, 0x22, 0xF0, 0x87, 
-        0xF3, 0x2E, 0xF3, 0x2E, 0x0C, 0xD1, 0x0C, 0x2E, 0xAC, 0x71, 0xAC, 0x71, 0x53, 0x71, 0xA3, 0xD4  };
-#endif
-
-uint8_t grain_enc(grain_ctx_t* ctx){
-	uint8_t s80, s0, c1, c2;
-	uint8_t i;
-	/* clock the LFSR */
-	s0=S(0);
-	s80 =S(62) ^ S(51) ^ S(38) ^ S(23) ^ S(13) ^ s0;
-	s80 &= 1;
-	c1 = s80;
-	for(i=0; i<10; ++i){
-		c2 = (ctx->lfsr[i])>>7;
-		ctx->lfsr[i] = ((ctx->lfsr[i])<<1) | c1;
-		c1 = c2;
-	}
-	/* clock the NFSR */
-	uint8_t b80;
-/*	778 Byte in this variant / 617 clks enc_time */
-#ifndef GRAIN_BADOPTIMISATION
-    uint8_t a,b,d,e;
-	b80 = B(62) ^ B(60) ^ B(52) ^ B(45) ^ 
-	      B(37) ^ B(33) ^ B(28) ^ B(21) ^ 
-	      B(14) ^ B( 9) ^ B( 0) ^ s0;
-	b80 ^= (a = B(63) & B(60));
-	b80 ^= (b = B(37) & B(33));
-	b80 ^= B(15) & B( 9); // c 
-	b80 ^= (d = B(60) & B(52) & B(45));
-	b80 ^= (e = B(33) & B(28) & B(21));
-	b80 ^= B(63) & B(45) & B(28) & B(9); // f 
-	/* -- */
-	b80 ^= b & B(60) & B(52); // g 
-	b80 ^= a & B(21) & B(15); // h 
-	b80 ^= d & B(63) & B(37); // i 
-	b80 ^= e & B(15) & B( 9); // j 
-	b80 ^= e & B(52) & B(45) & B(37); // k
-#else
-	/* let's reorder the bits */
-	uint16_t x; 
-
-/*
-	x  = _B(21); x<<=1;
-	x |= _B(33); x<<=1;
-	x |= _B(9) ; x<<=1;
-	x |= _B(45); x<<=1;
-	x |= _B(52); x<<=1;
-	x |= _B(37); x<<=1;
-	x |= _B(60); x<<=1;
-	x |= _B(28); x<<=1;
-	x |= _B(15); x<<=1;
-	x |= _B(63);
-*/
-	x  = ((ctx->nfsr[8])&0x41)<<1; // B15 & B09
-	x |= ((ctx->nfsr[2])&0x09);    // B63 & B60 
-//	x |= ((ctx->nfsr[4])&0x04)<<4; // B45
-	x |= (((ctx->nfsr[5])&0x44) | 
-	      ((ctx->nfsr[3])&0x08) | 
-	      (((((ctx->nfsr[7])&0x04)<<3) |((ctx->nfsr[4])&0x04))<<2) )<<2; // B37 & B33
-//	x |= ((ctx->nfsr[3])&0x08)<<2; // B52
-	x |= ((ctx->nfsr[6])&0x08)>>1; // B28
-//	x |= ((ctx->nfsr[7])&0x04)<<7; // B21 
-
-
-	b80 = pgm_read_byte(g_lut+(x/8))>>(x%8);
-	b80 ^= s0 ^ B(62) ^ B(14) ^ B(0);
-#endif
-	c1 = b80 & 1;
-	for(i=0; i<10; ++i){
-		c2 = (ctx->nfsr[i])>>7;
-		ctx->nfsr[i] = ((ctx->nfsr[i])<<1) | c1;
-		c1 = c2;
-	}
-	/* now the h function */
-	uint8_t h;
-	i = (S(2)&1) | 
-	    ((S(24)&1) << 1) |
-	    ((S(45)&1) << 2) |
-	    ((S(63)&1) << 3) |
-	    ((B(62)&1) << 4);
-	
-	h = (pgm_read_byte(h_lut+(i/8)))>>(i%8);
-	
-	h ^= B(0) ^ B(1) ^ B(3) ^ B(9) ^ B(30) ^ B(42) ^ B(55);
-	return h&1;
-}
-
-#ifdef GRAIN_REVERSEKEY
-
-static
-uint8_t reverse_bits(uint8_t a){
-	uint8_t lut[16] = {
-		0x0, 0x8, 0x4, 0xC,   /* 0000 1000 0100 1100 */
-		0x2, 0xA, 0x6, 0xE,   /* 0010 1010 0110 1110 */
-		0x1, 0x9, 0x5, 0xD,   /* 0001 1001 0101 1101 */
-		0x3, 0xB, 0x7, 0xF }; /* 0011 1011 0111 1111 */
-	uint8_t x;
-	x = ((lut[a&0xf]) << 4) | lut[a>>4];
-	return x;
-}
-#else
-
-#define reverse_bits(a) (a)
-
-#endif
-
-void grain_init(const void* key, const void* iv, grain_ctx_t* ctx){
-	uint8_t i,t;
-	
-	/* load the 80bit key */
-	for(i=0; i<10; ++i){
-		ctx->nfsr[9-i] = reverse_bits(((uint8_t*)key)[i]);
-	}
-	/* load the 64bit iv */
-	for(i=0; i<8; ++i){
-		ctx->lfsr[9-i] = reverse_bits(((uint8_t*)iv)[i]);
-	}
-	/* set the other bits of iv to 1 */
-	ctx->lfsr[0] = ctx->lfsr[1] = 0xFF;
-	
-	/* run it 160 times */
-	for(i=0; i<160; ++i){
-		t = grain_enc(ctx);
-		(ctx->lfsr[0]) ^= t;
-		(ctx->nfsr[0]) ^= t;
-	}
-}
-
-
-
-
-
-
diff --git a/grain.h b/grain.h
deleted file mode 100644
index 2526fdc..0000000
--- a/grain.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/* grain.h */
-/*
-    This file is part of the AVR-Crypto-Lib.
-    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-/** \file grain.h
- * \author  Daniel Otte
- * \email   daniel.otte@rub.de
- * \license GPLv3 or later
- * \brief implementation of the Grain streamcipher
- */
-
-#ifndef GRAIN_H_
-#define GRAIN_H_
-
-
-#include <stdint.h>
-
-typedef struct gain_ctx_st{
-	uint8_t lfsr[10];
-	uint8_t nfsr[10];
-} grain_ctx_t;
-
-
-uint8_t grain_enc(grain_ctx_t* ctx);
-void grain_init(const void* key, const void* iv, grain_ctx_t* ctx);
-
-#endif /*GRAIN_H_*/
diff --git a/grain/grain.c b/grain/grain.c
new file mode 100644
index 0000000..8d30d22
--- /dev/null
+++ b/grain/grain.c
@@ -0,0 +1,185 @@
+/* grain.c */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/**
+ * 
+ * author: Daniel Otte
+ * email:  daniel.otte@rub.de
+ * license: GPLv3 or later
+ * 
+ */
+
+
+#include <stdint.h>
+#include <string.h>
+#include <avr/pgmspace.h>
+#include "grain.h"
+
+
+#define GRAIN_REVERSEKEY
+
+/* s0, s1, s2, ..., s78, s79 */
+#define S(i) ((ctx->lfsr[9-((i)/8)])>>(7-((i)%8)))
+/* b0, b1, b2, ..., b78, b79 */
+#define B(i) ((ctx->nfsr[9-((i)/8)])>>(7-((i)%8)))
+#define _B(i) (((ctx->nfsr[9-((i)/8)])>>(7-((i)%8)))&1)
+
+
+uint8_t h_lut[4] PROGMEM = {0x4C, 0xB6, 0xD3, 0x26};
+
+#ifdef GRAIN_BADOPTIMISATION
+uint8_t g_lut[128] PROGMEM = {
+        0xF0, 0xA5, 0x0F, 0x5A, 0x0F, 0x5A, 0xF0, 0xA5, 0x0F, 0x5A, 0xF0, 0xA5, 0xF0, 0x5A, 0x0F, 0x0F, 
+        0xC3, 0x96, 0x3C, 0x69, 0x3C, 0x69, 0xC3, 0x96, 0x9C, 0xC9, 0x63, 0x36, 0x63, 0xC9, 0x9C, 0x9C, 
+        0x0F, 0x5A, 0x0F, 0x5A, 0xF0, 0xA5, 0xF0, 0x5A, 0xF0, 0xA5, 0xF0, 0xA5, 0x0F, 0xA5, 0x0F, 0xF0, 
+        0x3C, 0x69, 0x3C, 0x69, 0xC3, 0x96, 0xC3, 0x69, 0x63, 0x36, 0x63, 0x36, 0x9C, 0x36, 0x9C, 0x63, 
+        0x0F, 0xD2, 0xF0, 0x2D, 0xF0, 0x2D, 0x0F, 0xD2, 0xF0, 0x2D, 0x0F, 0xD2, 0x0F, 0x2D, 0xF0, 0x78, 
+        0x3C, 0xE1, 0xC3, 0x1E, 0xC3, 0x1E, 0x3C, 0xE1, 0x63, 0xBE, 0x9C, 0x41, 0x9C, 0xBE, 0x63, 0xEB, 
+        0x00, 0xDD, 0x00, 0xDD, 0xFF, 0x22, 0xFF, 0xDD, 0xFF, 0x22, 0xFF, 0x22, 0x00, 0x22, 0xF0, 0x87, 
+        0xF3, 0x2E, 0xF3, 0x2E, 0x0C, 0xD1, 0x0C, 0x2E, 0xAC, 0x71, 0xAC, 0x71, 0x53, 0x71, 0xA3, 0xD4  };
+#endif
+
+uint8_t grain_enc(grain_ctx_t* ctx){
+	uint8_t s80, s0, c1, c2;
+	uint8_t i;
+	/* clock the LFSR */
+	s0=S(0);
+	s80 =S(62) ^ S(51) ^ S(38) ^ S(23) ^ S(13) ^ s0;
+	s80 &= 1;
+	c1 = s80;
+	for(i=0; i<10; ++i){
+		c2 = (ctx->lfsr[i])>>7;
+		ctx->lfsr[i] = ((ctx->lfsr[i])<<1) | c1;
+		c1 = c2;
+	}
+	/* clock the NFSR */
+	uint8_t b80;
+/*	778 Byte in this variant / 617 clks enc_time */
+#ifndef GRAIN_BADOPTIMISATION
+    uint8_t a,b,d,e;
+	b80 = B(62) ^ B(60) ^ B(52) ^ B(45) ^ 
+	      B(37) ^ B(33) ^ B(28) ^ B(21) ^ 
+	      B(14) ^ B( 9) ^ B( 0) ^ s0;
+	b80 ^= (a = B(63) & B(60));
+	b80 ^= (b = B(37) & B(33));
+	b80 ^= B(15) & B( 9); // c 
+	b80 ^= (d = B(60) & B(52) & B(45));
+	b80 ^= (e = B(33) & B(28) & B(21));
+	b80 ^= B(63) & B(45) & B(28) & B(9); // f 
+	/* -- */
+	b80 ^= b & B(60) & B(52); // g 
+	b80 ^= a & B(21) & B(15); // h 
+	b80 ^= d & B(63) & B(37); // i 
+	b80 ^= e & B(15) & B( 9); // j 
+	b80 ^= e & B(52) & B(45) & B(37); // k
+#else
+	/* let's reorder the bits */
+	uint16_t x; 
+
+/*
+	x  = _B(21); x<<=1;
+	x |= _B(33); x<<=1;
+	x |= _B(9) ; x<<=1;
+	x |= _B(45); x<<=1;
+	x |= _B(52); x<<=1;
+	x |= _B(37); x<<=1;
+	x |= _B(60); x<<=1;
+	x |= _B(28); x<<=1;
+	x |= _B(15); x<<=1;
+	x |= _B(63);
+*/
+	x  = ((ctx->nfsr[8])&0x41)<<1; // B15 & B09
+	x |= ((ctx->nfsr[2])&0x09);    // B63 & B60 
+//	x |= ((ctx->nfsr[4])&0x04)<<4; // B45
+	x |= (((ctx->nfsr[5])&0x44) | 
+	      ((ctx->nfsr[3])&0x08) | 
+	      (((((ctx->nfsr[7])&0x04)<<3) |((ctx->nfsr[4])&0x04))<<2) )<<2; // B37 & B33
+//	x |= ((ctx->nfsr[3])&0x08)<<2; // B52
+	x |= ((ctx->nfsr[6])&0x08)>>1; // B28
+//	x |= ((ctx->nfsr[7])&0x04)<<7; // B21 
+
+
+	b80 = pgm_read_byte(g_lut+(x/8))>>(x%8);
+	b80 ^= s0 ^ B(62) ^ B(14) ^ B(0);
+#endif
+	c1 = b80 & 1;
+	for(i=0; i<10; ++i){
+		c2 = (ctx->nfsr[i])>>7;
+		ctx->nfsr[i] = ((ctx->nfsr[i])<<1) | c1;
+		c1 = c2;
+	}
+	/* now the h function */
+	uint8_t h;
+	i = (S(2)&1) | 
+	    ((S(24)&1) << 1) |
+	    ((S(45)&1) << 2) |
+	    ((S(63)&1) << 3) |
+	    ((B(62)&1) << 4);
+	
+	h = (pgm_read_byte(h_lut+(i/8)))>>(i%8);
+	
+	h ^= B(0) ^ B(1) ^ B(3) ^ B(9) ^ B(30) ^ B(42) ^ B(55);
+	return h&1;
+}
+
+#ifdef GRAIN_REVERSEKEY
+
+static
+uint8_t reverse_bits(uint8_t a){
+	uint8_t lut[16] = {
+		0x0, 0x8, 0x4, 0xC,   /* 0000 1000 0100 1100 */
+		0x2, 0xA, 0x6, 0xE,   /* 0010 1010 0110 1110 */
+		0x1, 0x9, 0x5, 0xD,   /* 0001 1001 0101 1101 */
+		0x3, 0xB, 0x7, 0xF }; /* 0011 1011 0111 1111 */
+	uint8_t x;
+	x = ((lut[a&0xf]) << 4) | lut[a>>4];
+	return x;
+}
+#else
+
+#define reverse_bits(a) (a)
+
+#endif
+
+void grain_init(const void* key, const void* iv, grain_ctx_t* ctx){
+	uint8_t i,t;
+	
+	/* load the 80bit key */
+	for(i=0; i<10; ++i){
+		ctx->nfsr[9-i] = reverse_bits(((uint8_t*)key)[i]);
+	}
+	/* load the 64bit iv */
+	for(i=0; i<8; ++i){
+		ctx->lfsr[9-i] = reverse_bits(((uint8_t*)iv)[i]);
+	}
+	/* set the other bits of iv to 1 */
+	ctx->lfsr[0] = ctx->lfsr[1] = 0xFF;
+	
+	/* run it 160 times */
+	for(i=0; i<160; ++i){
+		t = grain_enc(ctx);
+		(ctx->lfsr[0]) ^= t;
+		(ctx->nfsr[0]) ^= t;
+	}
+}
+
+
+
+
+
+
diff --git a/grain/grain.h b/grain/grain.h
new file mode 100644
index 0000000..2526fdc
--- /dev/null
+++ b/grain/grain.h
@@ -0,0 +1,42 @@
+/* grain.h */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/** \file grain.h
+ * \author  Daniel Otte
+ * \email   daniel.otte@rub.de
+ * \license GPLv3 or later
+ * \brief implementation of the Grain streamcipher
+ */
+
+#ifndef GRAIN_H_
+#define GRAIN_H_
+
+
+#include <stdint.h>
+
+typedef struct gain_ctx_st{
+	uint8_t lfsr[10];
+	uint8_t nfsr[10];
+} grain_ctx_t;
+
+
+uint8_t grain_enc(grain_ctx_t* ctx);
+void grain_init(const void* key, const void* iv, grain_ctx_t* ctx);
+
+#endif /*GRAIN_H_*/
diff --git a/hmac-md5.c b/hmac-md5.c
deleted file mode 100644
index d72dca9..0000000
--- a/hmac-md5.c
+++ /dev/null
@@ -1,130 +0,0 @@
-/* hmac-md5.c */
-/*
-    This file is part of the AVR-Crypto-Lib.
-    Copyright (C) 2009  Daniel Otte (daniel.otte@rub.de)
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-/**
- * 
- * implementation of HMAC as described in RFC2104
- * Author:      Daniel Otte
- * email:       daniel.otte@rub.de
- * License:     GPLv3 or later
- **/
-
-/* 
- * hmac = hash ( k^opad , hash( k^ipad  , msg))
- */
-
-#include <stdint.h>
-#include <string.h>
-#include "config.h"
-#include "md5.h"
-#include "hmac-md5.h"
-
-#define IPAD 0x36
-#define OPAD 0x5C
-
-#ifndef HMAC_SHORTONLY
-
-void hmac_md5_init(hmac_md5_ctx_t *s, void* key, uint16_t keylength_b){
-	uint8_t buffer[MD5_BLOCK_BYTES];
-	uint8_t i;
-	
-	memset(buffer, 0, MD5_BLOCK_BYTES);
-	if (keylength_b > MD5_BLOCK_BITS){
-		md5((void*)buffer, key, keylength_b);
-	} else {
-		memcpy(buffer, key, (keylength_b+7)/8);
-	}
-	
-	for (i=0; i<MD5_BLOCK_BYTES; ++i){
-		buffer[i] ^= IPAD;
-	}
-	md5_init(&(s->a));
-	md5_nextBlock(&(s->a), buffer);
-	
-	for (i=0; i<MD5_BLOCK_BYTES; ++i){
-		buffer[i] ^= IPAD^OPAD;
-	}
-	md5_init(&(s->b));
-	md5_nextBlock(&(s->b), buffer);
-	
-#if defined SECURE_WIPE_BUFFER
-	memset(buffer, 0, MD5_BLOCK_BYTES);
-#endif
-}
-
-void hmac_md5_nextBlock(hmac_md5_ctx_t *s, const void* block){
-	md5_nextBlock(&(s->a), block);
-}
-
-void hmac_md5_lastBlock(hmac_md5_ctx_t *s, const void* block, uint16_t length_b){
-	md5_lastBlock(&(s->a), block, length_b);
-}
-
-void hmac_md5_final(void* dest, hmac_md5_ctx_t *s){
-	md5_ctx2hash((md5_hash_t*)dest, &(s->a));
-	md5_lastBlock(&(s->b), dest, MD5_HASH_BITS);
-	md5_ctx2hash((md5_hash_t*)dest, &(s->b));
-}
-
-#endif
-
-/*
-void hmac_md5_nextBlock()
-void hmac_md5_lastBlock()
-*/
-
-/*
- * keylength in bits!
- * message length in bits!
- */
-void hmac_md5(void* dest, void* key, uint16_t keylength_b, void* msg, uint32_t msglength_b){ /* a one-shot*/
-	md5_ctx_t s;
-	uint8_t i;
-	uint8_t buffer[MD5_BLOCK_BYTES];
-	
-	memset(buffer, 0, MD5_BLOCK_BYTES);
-	
-	/* if key is larger than a block we have to hash it*/
-	if (keylength_b > MD5_BLOCK_BITS){
-		md5((void*)buffer, key, keylength_b);
-	} else {
-		memcpy(buffer, key, (keylength_b+7)/8);
-	}
-	
-	for (i=0; i<MD5_BLOCK_BYTES; ++i){
-		buffer[i] ^= IPAD;
-	}
-	md5_init(&s);
-	md5_nextBlock(&s, buffer);
-	while (msglength_b >= MD5_BLOCK_BITS){
-		md5_nextBlock(&s, msg);
-		msg = (uint8_t*)msg + MD5_BLOCK_BYTES;
-		msglength_b -=  MD5_BLOCK_BITS;
-	}
-	md5_lastBlock(&s, msg, msglength_b);
-	/* since buffer still contains key xor ipad we can do ... */
-	for (i=0; i<MD5_BLOCK_BYTES; ++i){
-		buffer[i] ^= IPAD ^ OPAD;
-	}
-	md5_ctx2hash(dest, &s); /* save inner hash temporary to dest */
-	md5_init(&s);
-	md5_nextBlock(&s, buffer);
-	md5_lastBlock(&s, dest, MD5_HASH_BITS);
-	md5_ctx2hash(dest, &s);
-}
-
diff --git a/hmac-md5.h b/hmac-md5.h
deleted file mode 100644
index 5bbaeb9..0000000
--- a/hmac-md5.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/* hmac-md5.h */
-/*
-    This file is part of the AVR-Crypto-Lib.
-    Copyright (C) 2009  Daniel Otte (daniel.otte@rub.de)
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-#ifndef HMACMD5_H_
-#define HMACMD5_H_
-
-#include "md5.h"
-
-#define HMAC_MD5_BITS        MD5_HASH_BITS
-#define HMAC_MD5_BYTES       MD5_HASH_BYTES
-#define HMAC_MD5_BLOCK_BITS  MD5_BLOCK_BITS
-#define HMAC_MD5_BLOCK_BYTES MD5_BLOCK_BYTES
-
-typedef struct{
-	md5_ctx_t a,b;
-} hmac_md5_ctx_t;
-
-
-void hmac_md5_init(hmac_md5_ctx_t *s, void* key, uint16_t keylength_b);
-void hmac_md5_nextBlock(hmac_md5_ctx_t *s, const void* block);
-void hmac_md5_lastBlock(hmac_md5_ctx_t *s, const void* block, uint16_t length_b);
-void hmac_md5_final(void* dest, hmac_md5_ctx_t *s);
-
-void hmac_md5(void* dest, void* key, uint16_t keylength_b, void* msg, uint32_t msglength_b);
-
-
-#endif /*HMACMD5_H_*/
diff --git a/hmac-md5/base64_dec.c b/hmac-md5/base64_dec.c
new file mode 100644
index 0000000..f057f54
--- /dev/null
+++ b/hmac-md5/base64_dec.c
@@ -0,0 +1,246 @@
+/* base64_dec.c */
+/*
+ *   This file is part of the AVR-Crypto-Lib.
+ *   Copyright (C) 2006, 2007, 2008  Daniel Otte (daniel.otte@rub.de)
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 3 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+/**
+ * base64 decoder (RFC3548)
+ * Author: Daniel Otte
+ * License: GPLv3
+ * 
+ * 
+ */
+
+#include <stdint.h>
+#include "base64_dec.h"
+
+#include "test_src/cli.h"
+
+/*
+ #define USE_GCC_EXTENSION
+*/
+#if 1
+
+#ifdef USE_GCC_EXTENSION
+
+static
+int ascii2bit6(char a){
+	switch(a){
+		case 'A'...'Z':
+			return a-'A';
+		case 'a'...'z':
+			return a-'a'+26;
+		case '0'...'9':
+			return a-'0'+52;
+		case '+':
+		case '-':
+			return 62;
+		case '/':
+		case '_':
+			return 63;
+		default:
+			return -1;
+	}
+}
+
+#else
+
+static
+uint8_t ascii2bit6(char a){
+	int r;
+	switch(a>>4){
+		case 0x5:
+		case 0x4: 
+			r=a-'A';
+			if(r<0 || r>25){
+				return -1;
+			} else {
+				return r;
+			}
+		case 0x7:
+		case 0x6: 
+			r=a-'a';
+			if(r<0 || r>25){
+				return -1;
+			} else {
+				return r+26;
+			}
+			break;
+		case 0x3:
+			if(a>'9')
+				return -1;
+			return a-'0'+52;
+		default:
+			break;	
+	}
+	switch (a){
+		case '+':
+		case '-':
+			return 62;
+		case '/':
+		case '_':
+			return 63;
+		default:
+			return 0xff;
+	}
+}
+
+#endif
+
+#else
+
+static 
+uint8_t ascii2bit6(uint8_t a){
+	if(a>='A' && a<='Z'){
+		return a-'A';
+	} else {
+		if(a>='a' && a<= 'z'){
+			return a-'a'+26;
+		} else {
+			if(a>='0' && a<='9'){
+				return a-'0'+52;
+			} else {
+				if(a=='+' || a=='-'){
+					return 62;
+				} else {
+					if(a=='/' || a=='_'){
+						return 63;
+					} else {
+						return 0xff;
+					}
+				}
+			}
+		}
+	}
+}
+
+#endif
+
+int base64_binlength(char* str, uint8_t strict){
+	int l=0;
+	uint8_t term=0;
+	for(;;){
+		if(*str=='\0')
+			break;
+		if(*str=='\n' || *str=='\r'){
+			str++;
+			continue;
+		}
+		if(*str=='='){
+			term++;
+			str++;
+			if(term==2){
+				break;
+			}
+			continue;
+		}
+		if(term)
+			return -1;
+		if(ascii2bit6(*str)==-1){
+			if(strict)
+				return -1;
+		} else {
+			l++;
+		}
+		str++;
+	}
+	switch(term){
+		case 0:
+			if(l%4!=0)
+				return -1;
+			return l/4*3;
+		case 1:
+			if(l%4!=3)
+				return -1;
+			return (l+1)/4*3-1;
+		case 2:
+			if(l%4!=2)
+				return -1;
+			return (l+2)/4*3-2;
+		default:
+			return -1;
+	}
+}
+
+/*
+  |543210543210543210543210|
+  |765432107654321076543210|
+
+        .      .      .     .
+  |54321054|32105432|10543210|
+  |76543210|76543210|76543210|
+
+*/
+
+int base64dec(void* dest, char* b64str, uint8_t strict){
+	uint8_t buffer[4];
+	uint8_t idx=0;
+	uint8_t term=0;
+	for(;;){
+//		cli_putstr_P(PSTR("\r\n  DBG: got 0x"));
+//		cli_hexdump(b64str, 1);
+		buffer[idx]= ascii2bit6(*b64str);
+//		cli_putstr_P(PSTR(" --> 0x"));
+//		cli_hexdump(buffer+idx, 1);
+		
+		if(buffer[idx]==0xFF){
+			if(*b64str=='='){
+				term++;
+				b64str++;
+				if(term==2)
+					goto finalize; /* definitly the end */
+			}else{
+				if(*b64str == '\0'){
+					goto finalize; /* definitly the end */
+				}else{
+					if(*b64str == '\r' || *b64str == '\n' || !(strict)){
+						b64str++; /* charcters that we simply ignore */
+					}else{
+						return -1;
+					}
+				}
+			}
+		}else{
+			if(term)
+				return -1; /* this happens if we get a '=' in the stream */
+			idx++;
+			b64str++;
+		}
+		if(idx==4){
+			((uint8_t*)dest)[0] = buffer[0]<<2 | buffer[1]>>4;
+			((uint8_t*)dest)[1] = buffer[1]<<4 | buffer[2]>>2;
+			((uint8_t*)dest)[2] = buffer[2]<<6 | buffer[3];
+			dest = (uint8_t*)dest +3;
+			idx=0;
+		}
+	}
+  finalize:	
+	/* the final touch */
+	if(idx==0)
+		return 0;
+	if(term==1){
+		((uint8_t*)dest)[0] = buffer[0]<<2 | buffer[1]>>4;
+		((uint8_t*)dest)[1] = buffer[1]<<4 | buffer[2]>>2;			
+		return 0;
+	}
+	if(term==2){
+		((uint8_t*)dest)[0] = buffer[0]<<2 | buffer[1]>>4;
+		return 0;
+	}
+	return -1;
+}
diff --git a/hmac-md5/base64_dec.h b/hmac-md5/base64_dec.h
new file mode 100644
index 0000000..39beff8
--- /dev/null
+++ b/hmac-md5/base64_dec.h
@@ -0,0 +1,29 @@
+/* base64_dec.h */
+/*
+ *   This file is part of the AVR-Crypto-Lib.
+ *   Copyright (C) 2006, 2007, 2008  Daniel Otte (daniel.otte@rub.de)
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 3 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+#ifndef BASE64_DEC_H_
+#define BASE64_DEC_H_
+
+#include <stdint.h>
+
+int base64_binlength(char* str, uint8_t strict);
+int base64dec(void* dest, char* b64str, uint8_t strict);
+
+#endif /*BASE64_DEC_H_*/
diff --git a/hmac-md5/base64_enc.c b/hmac-md5/base64_enc.c
new file mode 100644
index 0000000..400f25c
--- /dev/null
+++ b/hmac-md5/base64_enc.c
@@ -0,0 +1,117 @@
+/* base64_enc.c */
+/*
+ *   This file is part of the AVR-Crypto-Lib.
+ *   Copyright (C) 2006, 2007, 2008  Daniel Otte (daniel.otte@rub.de)
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 3 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+/**
+ * base64 encoder (RFC3548)
+ * Author: Daniel Otte
+ * License: GPLv3
+ * 
+ * 
+ */
+
+#include <stdint.h>
+#include "base64_enc.h"
+
+#if 1
+#include <avr/pgmspace.h>
+
+char base64_alphabet[64] PROGMEM = {
+	'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 
+	'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 
+	'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 
+	'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 
+	'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 
+	'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 
+	'w', 'x', 'y', 'z', '0', '1', '2', '3', 
+	'4', '5', '6', '7', '8', '9', '+', '/' }; 
+
+static 
+char bit6toAscii(uint8_t a){
+	a &= (uint8_t)0x3F;
+	return pgm_read_byte(base64_alphabet+a);
+}
+
+#else
+
+static 
+char bit6toAscii(uint8_t a){
+	a &= (uint8_t)0x3F;
+	
+	if(a<=25){
+		return a+'A';
+	} else {
+		if(a<=51){
+			return a-26+'a';
+		} else {
+			if(a<=61){
+				return a-52+'0';
+			} else {
+				if(a==62){
+					return '+';
+				} else {
+					return '/'; /* a == 63 */
+				}
+			}
+		}
+	}
+}
+
+#endif
+
+void base64enc(char* dest, void* src, uint16_t length){
+	uint16_t i,j;
+	uint8_t a[4];
+	for(i=0; i<length/3; ++i){
+		a[0]= (((uint8_t*)src)[i*3+0])>>2;
+		a[1]= (((((uint8_t*)src)[i*3+0])<<4) | ((((uint8_t*)src)[i*3+1])>>4)) & 0x3F;
+		a[2]= (((((uint8_t*)src)[i*3+1])<<2) | ((((uint8_t*)src)[i*3+2])>>6)) & 0x3F;
+		a[3]= (((uint8_t*)src)[i*3+2]) & 0x3F;
+		for(j=0; j<4; ++j){
+			*dest++=bit6toAscii(a[j]);
+		}
+	}
+	/* now we do the rest */
+	switch(length%3){
+		case 0: 
+			break;
+		case 1:
+			a[0]=(((uint8_t*)src)[i*3+0])>>2;
+			a[1]=((((uint8_t*)src)[i*3+0])<<4)&0x3F;
+			*dest++ = bit6toAscii(a[0]);
+			*dest++ = bit6toAscii(a[1]);
+			*dest++ = '=';
+			*dest++ = '=';
+			break;
+		case 2:		
+			a[0]= (((uint8_t*)src)[i*3+0])>>2;
+			a[1]= (((((uint8_t*)src)[i*3+0])<<4) | ((((uint8_t*)src)[i*3+1])>>4)) & 0x3F;
+			a[2]= ((((uint8_t*)src)[i*3+1])<<2) & 0x3F;
+			*dest++ = bit6toAscii(a[0]);
+			*dest++ = bit6toAscii(a[1]);
+			*dest++ = bit6toAscii(a[2]);
+			*dest++ = '=';
+			break;
+		default: /* this will not happen! */
+			break;	
+	}
+/*  finalize: */
+  	*dest='\0';
+}
+
diff --git a/hmac-md5/base64_enc.h b/hmac-md5/base64_enc.h
new file mode 100644
index 0000000..9065132
--- /dev/null
+++ b/hmac-md5/base64_enc.h
@@ -0,0 +1,28 @@
+/* base64_enc.h */
+/*
+ *   This file is part of the AVR-Crypto-Lib.
+ *   Copyright (C) 2006, 2007, 2008  Daniel Otte (daniel.otte@rub.de)
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 3 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+#ifndef BASE64_ENC_H_
+#define BASE64_ENC_H_
+
+#include <stdint.h>
+
+void base64enc(char* dest, void* src, uint16_t length);
+
+#endif /*BASE64_ENC_H_*/
diff --git a/hmac-md5/hmac-md5.c b/hmac-md5/hmac-md5.c
new file mode 100644
index 0000000..d72dca9
--- /dev/null
+++ b/hmac-md5/hmac-md5.c
@@ -0,0 +1,130 @@
+/* hmac-md5.c */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2009  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/**
+ * 
+ * implementation of HMAC as described in RFC2104
+ * Author:      Daniel Otte
+ * email:       daniel.otte@rub.de
+ * License:     GPLv3 or later
+ **/
+
+/* 
+ * hmac = hash ( k^opad , hash( k^ipad  , msg))
+ */
+
+#include <stdint.h>
+#include <string.h>
+#include "config.h"
+#include "md5.h"
+#include "hmac-md5.h"
+
+#define IPAD 0x36
+#define OPAD 0x5C
+
+#ifndef HMAC_SHORTONLY
+
+void hmac_md5_init(hmac_md5_ctx_t *s, void* key, uint16_t keylength_b){
+	uint8_t buffer[MD5_BLOCK_BYTES];
+	uint8_t i;
+	
+	memset(buffer, 0, MD5_BLOCK_BYTES);
+	if (keylength_b > MD5_BLOCK_BITS){
+		md5((void*)buffer, key, keylength_b);
+	} else {
+		memcpy(buffer, key, (keylength_b+7)/8);
+	}
+	
+	for (i=0; i<MD5_BLOCK_BYTES; ++i){
+		buffer[i] ^= IPAD;
+	}
+	md5_init(&(s->a));
+	md5_nextBlock(&(s->a), buffer);
+	
+	for (i=0; i<MD5_BLOCK_BYTES; ++i){
+		buffer[i] ^= IPAD^OPAD;
+	}
+	md5_init(&(s->b));
+	md5_nextBlock(&(s->b), buffer);
+	
+#if defined SECURE_WIPE_BUFFER
+	memset(buffer, 0, MD5_BLOCK_BYTES);
+#endif
+}
+
+void hmac_md5_nextBlock(hmac_md5_ctx_t *s, const void* block){
+	md5_nextBlock(&(s->a), block);
+}
+
+void hmac_md5_lastBlock(hmac_md5_ctx_t *s, const void* block, uint16_t length_b){
+	md5_lastBlock(&(s->a), block, length_b);
+}
+
+void hmac_md5_final(void* dest, hmac_md5_ctx_t *s){
+	md5_ctx2hash((md5_hash_t*)dest, &(s->a));
+	md5_lastBlock(&(s->b), dest, MD5_HASH_BITS);
+	md5_ctx2hash((md5_hash_t*)dest, &(s->b));
+}
+
+#endif
+
+/*
+void hmac_md5_nextBlock()
+void hmac_md5_lastBlock()
+*/
+
+/*
+ * keylength in bits!
+ * message length in bits!
+ */
+void hmac_md5(void* dest, void* key, uint16_t keylength_b, void* msg, uint32_t msglength_b){ /* a one-shot*/
+	md5_ctx_t s;
+	uint8_t i;
+	uint8_t buffer[MD5_BLOCK_BYTES];
+	
+	memset(buffer, 0, MD5_BLOCK_BYTES);
+	
+	/* if key is larger than a block we have to hash it*/
+	if (keylength_b > MD5_BLOCK_BITS){
+		md5((void*)buffer, key, keylength_b);
+	} else {
+		memcpy(buffer, key, (keylength_b+7)/8);
+	}
+	
+	for (i=0; i<MD5_BLOCK_BYTES; ++i){
+		buffer[i] ^= IPAD;
+	}
+	md5_init(&s);
+	md5_nextBlock(&s, buffer);
+	while (msglength_b >= MD5_BLOCK_BITS){
+		md5_nextBlock(&s, msg);
+		msg = (uint8_t*)msg + MD5_BLOCK_BYTES;
+		msglength_b -=  MD5_BLOCK_BITS;
+	}
+	md5_lastBlock(&s, msg, msglength_b);
+	/* since buffer still contains key xor ipad we can do ... */
+	for (i=0; i<MD5_BLOCK_BYTES; ++i){
+		buffer[i] ^= IPAD ^ OPAD;
+	}
+	md5_ctx2hash(dest, &s); /* save inner hash temporary to dest */
+	md5_init(&s);
+	md5_nextBlock(&s, buffer);
+	md5_lastBlock(&s, dest, MD5_HASH_BITS);
+	md5_ctx2hash(dest, &s);
+}
+
diff --git a/hmac-md5/hmac-md5.h b/hmac-md5/hmac-md5.h
new file mode 100644
index 0000000..5bbaeb9
--- /dev/null
+++ b/hmac-md5/hmac-md5.h
@@ -0,0 +1,42 @@
+/* hmac-md5.h */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2009  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+#ifndef HMACMD5_H_
+#define HMACMD5_H_
+
+#include "md5.h"
+
+#define HMAC_MD5_BITS        MD5_HASH_BITS
+#define HMAC_MD5_BYTES       MD5_HASH_BYTES
+#define HMAC_MD5_BLOCK_BITS  MD5_BLOCK_BITS
+#define HMAC_MD5_BLOCK_BYTES MD5_BLOCK_BYTES
+
+typedef struct{
+	md5_ctx_t a,b;
+} hmac_md5_ctx_t;
+
+
+void hmac_md5_init(hmac_md5_ctx_t *s, void* key, uint16_t keylength_b);
+void hmac_md5_nextBlock(hmac_md5_ctx_t *s, const void* block);
+void hmac_md5_lastBlock(hmac_md5_ctx_t *s, const void* block, uint16_t length_b);
+void hmac_md5_final(void* dest, hmac_md5_ctx_t *s);
+
+void hmac_md5(void* dest, void* key, uint16_t keylength_b, void* msg, uint32_t msglength_b);
+
+
+#endif /*HMACMD5_H_*/
diff --git a/hmac-md5/md5-asm.S b/hmac-md5/md5-asm.S
new file mode 100644
index 0000000..de3b170
--- /dev/null
+++ b/hmac-md5/md5-asm.S
@@ -0,0 +1,977 @@
+/* md5-asm.S */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/*
+ * Author:  Daniel Otte
+ * License: GPLv3 or later
+ * Date:    2008-11-15
+*/
+
+
+#include "avr-asm-macros.S"
+
+;###########################################################	
+; S-BOX
+
+T_table:
+.hword	0xa478, 0xd76a, 0xb756, 0xe8c7, 0x70db, 0x2420, 0xceee, 0xc1bd, 0x0faf, 0xf57c 
+.hword	0xc62a, 0x4787, 0x4613, 0xa830, 0x9501, 0xfd46, 0x98d8, 0x6980, 0xf7af, 0x8b44 
+.hword	0x5bb1, 0xffff, 0xd7be, 0x895c, 0x1122, 0x6b90, 0x7193, 0xfd98, 0x438e, 0xa679 
+.hword	0x0821, 0x49b4, 0x2562, 0xf61e, 0xb340, 0xc040, 0x5a51, 0x265e, 0xc7aa, 0xe9b6 
+.hword	0x105d, 0xd62f, 0x1453, 0x0244, 0xe681, 0xd8a1, 0xfbc8, 0xe7d3, 0xcde6, 0x21e1 
+.hword	0x07d6, 0xc337, 0x0d87, 0xf4d5, 0x14ed, 0x455a, 0xe905, 0xa9e3, 0xa3f8, 0xfcef 
+.hword	0x02d9, 0x676f, 0x4c8a, 0x8d2a, 0x3942, 0xfffa, 0xf681, 0x8771, 0x6122, 0x6d9d 
+.hword	0x380c, 0xfde5, 0xea44, 0xa4be, 0xcfa9, 0x4bde, 0x4b60, 0xf6bb, 0xbc70, 0xbebf 
+.hword	0x7ec6, 0x289b, 0x27fa, 0xeaa1, 0x3085, 0xd4ef, 0x1d05, 0x0488, 0xd039, 0xd9d4 
+.hword	0x99e5, 0xe6db, 0x7cf8, 0x1fa2, 0x5665, 0xc4ac, 0x2244, 0xf429, 0xff97, 0x432a 
+.hword	0x23a7, 0xab94, 0xa039, 0xfc93, 0x59c3, 0x655b, 0xcc92, 0x8f0c, 0xf47d, 0xffef 
+.hword	0x5dd1, 0x8584, 0x7e4f, 0x6fa8, 0xe6e0, 0xfe2c, 0x4314, 0xa301, 0x11a1, 0x4e08 
+.hword	0x7e82, 0xf753, 0xf235, 0xbd3a, 0xd2bb, 0x2ad7, 0xd391, 0xeb86
+
+
+#define MD5_init_fast
+
+.global md5_init 
+#ifndef MD5_init_fast
+;###########################################################	
+;void md5_init(md5_ctx_t *state)
+; param1: (r24,r25) 16-bit pointer to sha256_ctx_t struct in ram
+; modifys: Z(r30,r31), X(r25,r26)
+; size = 9+5*4 WORDS = 29 WORDS = 58 Bytes
+md5_init:
+	movw r26, r24 ; (24,25) --> (26,27) load X with param1
+	ldi r30, lo8(md5_init_vector)
+	ldi r31, hi8(md5_init_vector)
+	ldi r24, 16+4
+md5_init_vloop:	
+	lpm r0, Z+ 
+	st X+, r0
+	dec r24
+	brne md5_init_vloop
+	ret
+	
+md5_init_vector:
+.hword 0x2301, 0x6745
+.hword 0xAB89, 0xEFCD 
+.hword 0xDCFE, 0x98BA 
+.hword 0x5476, 0x1032 
+.hword 0x0000, 0x0000
+
+#else
+;###########################################################	
+.global md5_init_fast 
+;void md5_init(md5_ctx_t *state)
+; param1: (r24,r25) 16-bit pointer to sha256_ctx_t struct in ram
+; modifys: r23, r22
+; cycles = 1+16*3+4*2+4 = 1+48+12 = 61
+; size = 1+16*2+4+1 WORDS = 38 WORDS = 76 Bytes
+md5_init:
+md5_init_fast:
+	movw r26, r24
+	ldi r24, 0x01
+	st X+, r24
+	ldi r24, 0x23
+	st X+, r24
+	ldi r24, 0x45
+	st X+, r24
+	ldi r24, 0x67
+	st X+, r24
+	ldi r24, 0x89
+	st X+, r24
+	ldi r24, 0xAB
+	st X+, r24
+	ldi r24, 0xCD
+	st X+, r24
+	ldi r24, 0xEF
+	st X+, r24
+	ldi r24, 0xFE
+	st X+, r24
+	ldi r24, 0xDC
+	st X+, r24
+	ldi r24, 0xBA
+	st X+, r24
+	ldi r24, 0x98
+	st X+, r24
+	ldi r24, 0x76
+	st X+, r24
+	ldi r24, 0x54
+	st X+, r24
+	ldi r24, 0x32
+	st X+, r24
+	ldi r24, 0x10
+	st X+, r24
+	st X+, r1
+	st X+, r1
+	st X+, r1
+	st X+, r1
+	ret
+#endif
+;###########################################################	
+
+/*
+static 
+uint32_t md5_F(uint32_t x, uint32_t y, uint32_t z){
+	return ((x&y)|((~x)&z));
+}
+*/
+; x: r22-r25
+; y: r18-r21
+; z: r14-r17
+md5_F:
+	and r18, r22
+	and r19, r23
+	and r20, r24
+	and r21, r25
+	com r22
+	com r23
+	com r24
+	com r25
+	and r22, r14
+	and r23, r15
+	and r24, r16
+	and r25, r17
+	or  r22, r18
+	or  r23, r19
+	or  r24, r20
+	or  r25, r21
+	rjmp md5_core_F_exit
+	
+/*
+static
+uint32_t md5_G(uint32_t x, uint32_t y, uint32_t z){
+	return ((x&z)|((~z)&y));
+}
+*/
+
+; x: r22-r25
+; y: r18-r21
+; z: r14-r17
+md5_G:
+	and r22, r14
+	and r23, r15
+	and r24, r16
+	and r25, r17
+	com r14
+	com r15
+	com r16
+	com r17
+	and r18, r14
+	and r19, r15
+	and r20, r16
+	and r21, r17
+	or  r22, r18
+	or  r23, r19
+	or  r24, r20
+	or  r25, r21
+	rjmp md5_core_F_exit
+/*
+static
+uint32_t md5_H(uint32_t x, uint32_t y, uint32_t z){
+	return (x^y^z);
+}
+*/
+; x: r22-r25
+; y: r18-r21
+; z: r14-r17
+md5_H:
+	eor r22, r18
+	eor r22, r14
+	eor r23, r19
+	eor r23, r15
+	eor r24, r20
+	eor r24, r16
+	eor r25, r21
+	eor r25, r17
+	rjmp md5_core_F_exit
+/*
+static
+uint32_t md5_I(uint32_t x, uint32_t y, uint32_t z){
+	return (y ^ (x | (~z)));
+}
+*/
+
+jump_table:
+	rjmp md5_F
+	rjmp md5_G
+	rjmp md5_H
+;	rjmp md5_I
+
+; x: r22-r25
+; y: r18-r21
+; z: r14-r17
+md5_I:
+	com r14
+	com r15
+	com r16
+	com r17
+	or  r22, r14
+	or  r23, r15
+	or  r24, r16
+	or  r25, r17
+	eor r22, r18
+	eor r23, r19
+	eor r24, r20
+	eor r25, r21
+	rjmp md5_core_F_exit
+
+as_table:
+;     (as+0)&3  (as+3)&3  (as+1)&3  (as+2)&3
+;                  Z         X         Y
+;     AS_SAVE0  AS_SAVE1  AS_SAVE2  AS_SAVE3 
+.byte   1*4,      0*4,      2*4,      3*4    ;as=1
+.byte   2*4,      1*4,      3*4,      0*4    ;as=2
+.byte   3*4,      2*4,      0*4,      1*4    ;as=3
+.byte   0*4,      3*4,      1*4,      2*4    ;as=4
+
+;###########################################################	
+.global md5_core
+md5_core:
+	mov r21, r20
+	mov r20, r18
+	mov r19, r16
+	mov r18, r14
+;	rjmp md5_core_asm
+/*
+void md5_core(uint32_t* a, void* block, uint8_t as, uint8_t s, uint8_t i, uint8_t fi){
+	uint32_t t;
+	md5_func_t* funcs[]={md5_F, md5_G, md5_H, md5_I};
+	as &= 0x3;
+	/ * a = b + ((a + F(b,c,d) + X[k] + T[i]) <<< s). * /
+	t = a[as] + funcs[fi](a[(as+1)&3], a[(as+2)&3], a[(as+3)&3]) + *((uint32_t*)block) + md5_T[i] ;
+	a[as]=a[(as+1)&3] + ROTL32(t, s);
+}
+*/
+; a:     r24-r25
+; block: r22-r23
+; as:    r21
+; s:     r20
+; i:     r19
+; fi:    r18
+P_A0 = 24
+P_A1 = 25
+P_B0 = 22
+P_B1 = 23
+P_AS = 21
+P_S  = 20
+P_I  = 19
+P_FI = 18
+
+; x: r22-r25
+; y: r18-r21
+; z: r14-r17
+
+
+AS_SAVE0  =  4
+AS_SAVE1  =  5
+AS_SAVE2  =  6
+AS_SAVE3  =  7
+FI_SAVE   =  8
+S_SAVE    =  9
+ACCU0     = 10
+ACCU1     = 11
+ACCU2     = 12
+ACCU3     = 13
+ARG_X0    = 22
+ARG_X1    = 23
+ARG_X2    = 24
+ARG_X3    = 25
+ARG_Y0    = 18
+ARG_Y1    = 19
+ARG_Y2    = 20
+ARG_Y3    = 21
+ARG_Z0    = 14
+ARG_Z1    = 15
+ARG_Z2    = 16
+ARG_Z3    = 17
+
+
+md5_core_asm:
+	push r16
+	push r17
+	push_range 4, 8
+	ldi r30, lo8(T_table)
+	ldi r31, hi8(T_table)
+	lsl P_I
+	rol r1
+	lsl P_I
+	rol r1
+	add r30, P_I
+	adc r31, r1
+	clr r1
+	mov FI_SAVE, r18
+	/* loading T[i] into ACCU */	
+	lpm ACCU0, Z+	
+	lpm ACCU1, Z+	
+	lpm ACCU2, Z+	
+	lpm ACCU3, Z
+	/* add *block to ACCU */
+	movw r30, P_B0
+	ld r0, Z+
+	add ACCU0, r0
+	ld r0, Z+
+	adc ACCU1, r0
+	ld r0, Z+
+	adc ACCU2, r0
+	ld r0, Z+
+	adc ACCU3, r0
+	/* add a[as+0&3] to ACCU */
+	ldi r30, lo8(as_table)
+	ldi r31, hi8(as_table)
+	dec P_AS
+	andi P_AS, 0x03
+	lsl P_AS
+	lsl P_AS
+	add r30, r21
+	adc r31, r1       ; Z points to the correct row in as_table
+	lpm AS_SAVE0, Z+
+	lpm AS_SAVE1, Z+
+	lpm AS_SAVE2, Z+
+	lpm AS_SAVE3, Z
+	movw r26, r24     ; X points to a[0]
+	add r26, AS_SAVE0
+	adc r27, r1       ; X points at a[as&3]
+	ld r0, X+
+	add ACCU0, r0
+	ld r0, X+
+	adc ACCU1, r0
+	ld r0, X+
+	adc ACCU2, r0
+	ld r0, X+
+	adc ACCU3, r0
+	mov S_SAVE, r20
+
+	movw r28, r24
+	/* loading z value */
+	movw r26, r28
+	add r26, AS_SAVE1
+	adc r27, r1
+	ld ARG_Z0, X+
+	ld ARG_Z1, X+
+	ld ARG_Z2, X+
+	ld ARG_Z3, X
+
+	/* loading x value */
+	movw r26, r28	
+	add r26, AS_SAVE2
+	adc r27, r1
+	ld ARG_X0, X+
+	ld ARG_X1, X+
+	ld ARG_X2, X+
+	ld ARG_X3, X
+
+	/* loading y value */
+	movw r26, r28
+	add r26, AS_SAVE3
+	adc r27, r1
+	ldi r30, pm_lo8(jump_table)
+	ldi r31, pm_hi8(jump_table)
+	add r30, FI_SAVE
+	adc r31, r1    ; Z points to the correct entry in our jump table
+	ld ARG_Y0, X+
+	ld ARG_Y1, X+
+	ld ARG_Y2, X+
+	ld ARG_Y3, X
+
+	ijmp /* calls the function pointed by Z */
+md5_core_F_exit:		
+
+	/* add ACCU to result of f() */
+	add r22, ACCU0
+	adc r23, ACCU1
+	adc r24, ACCU2
+	adc r25, ACCU3
+
+	/* rotate */
+	mov r20, S_SAVE
+rotl32:
+	cpi r20, 8
+	brlo bitrotl
+	mov r21, r25
+	mov r25, r24
+	mov r24, r23
+	mov r23, r22
+	mov r22, r21
+	subi r20, 8
+	rjmp rotl32
+bitrotl:
+	mov r21, r25
+bitrotl_loop:	
+	tst r20
+	breq fixrotl
+bitrotl_loop2:	
+	lsl r21
+	rol r22
+	rol r23
+	rol r24
+	rol r25
+	dec r20
+	brne bitrotl_loop2
+fixrotl:
+
+	/* add a[(as+1)&3]  */
+	movw r26, r28
+	add r26, AS_SAVE2
+	adc r27, r1
+	ld r0, X+
+	add r22, r0
+	ld r0, X+
+	adc r23, r0
+	ld r0, X+
+	adc r24, r0
+	ld r0, X
+	adc r25, r0
+
+	/* store result */
+	movw r26, r28
+	add r26, AS_SAVE0
+	adc r27, r1
+	st X+, r22
+	st X+, r23
+	st X+, r24
+	st X , r25	
+md5_core_exit:
+	pop_range 4, 8
+	pop r17
+	pop r16
+	ret
+
+;###################################################################
+/*
+void md5_nextBlock(md5_ctx_t *state, void* block){
+	uint32_t	a[4];
+	uint8_t		m,n,i=0;
+
+	a[0]=state->a[0];
+	a[1]=state->a[1];
+	a[2]=state->a[2];
+	a[3]=state->a[3];
+	
+	/ * round 1 * /
+	uint8_t s1t[]={7,12,17,22}; // 1,-1   1,4   2,-1   3,-2
+	for(m=0;m<4;++m){
+		for(n=0;n<4;++n){
+			md5_core(a, &(((uint32_t*)block)[m*4+n]), 4-n, s1t[n],i++,0);
+		}
+	}
+	/ * round 2 * /
+	uint8_t s2t[]={5,9,14,20}; // 1,-3   1,1   2,-2   2,4
+	for(m=0;m<4;++m){
+		for(n=0;n<4;++n){
+			md5_core(a, &(((uint32_t*)block)[(1+m*4+n*5)&0xf]), 4-n, s2t[n],i++,1);
+		}
+	}
+	/ * round 3 * /
+	uint8_t s3t[]={4,11,16,23}; // 0,4   1,3   2,0   3,-1
+	for(m=0;m<4;++m){
+		for(n=0;n<4;++n){
+			md5_core(a, &(((uint32_t*)block)[(5-m*4+n*3)&0xf]), 4-n, s3t[n],i++,2);
+		}
+	}
+	/ * round 4 * /
+	uint8_t s4t[]={6,10,15,21}; // 1,-2   1,2   2,-1   3,-3
+	for(m=0;m<4;++m){
+		for(n=0;n<4;++n){
+			md5_core(a, &(((uint32_t*)block)[(0-m*4+n*7)&0xf]), 4-n, s4t[n],i++,3);
+		}
+	}
+	state->a[0] += a[0];
+	state->a[1] += a[1];
+	state->a[2] += a[2];
+	state->a[3] += a[3];
+	state->counter++;
+}
+*/
+
+shift_table_1:  .byte  7,12,17,22
+shift_table_2:  .byte  5, 9,14,20
+shift_table_3:  .byte  4,11,16,23
+shift_table_4:  .byte  6,10,15,21
+
+index_table_r2:
+;(1+m*4+n*5)&0xf:
+        .byte 0x04, 0x18, 0x2c, 0x00 
+        .byte 0x14, 0x28, 0x3c, 0x10 
+        .byte 0x24, 0x38, 0x0c, 0x20 
+        .byte 0x34, 0x08, 0x1c, 0x30 
+
+index_table_r3:
+;(5-m*4+n*3)&0xf:
+        .byte 0x14, 0x20, 0x2c, 0x38 
+        .byte 0x04, 0x10, 0x1c, 0x28 
+        .byte 0x34, 0x00, 0x0c, 0x18 
+        .byte 0x24, 0x30, 0x3c, 0x08 
+
+index_table_r4:
+;(0-m*4+n*7)&0xf:
+        .byte 0x00, 0x1c, 0x38, 0x14 
+        .byte 0x30, 0x0c, 0x28, 0x04 
+        .byte 0x20, 0x3c, 0x18, 0x34 
+        .byte 0x10, 0x2c, 0x08, 0x24
+
+APTR_REG = 2
+BPTR_REG = 4
+N_REG = 6
+M_REG = 7
+I_REG = 8
+.global md5_nextBlock
+md5_nextBlock:
+	stack_alloc 16
+	push_range 2, 17
+	push r28
+	push r29
+	push r24
+	push r25
+	adiw r30, 1 /* Z now points to the beginning of the allocated memory */
+	movw r2, r30
+	movw r4, r22
+	movw r26, r24
+	ldi r20, 16
+1:
+	ld r0, X+
+	st Z+, r0
+	dec r20
+	brne 1b
+	/* state now copied to stack memory */
+	clr I_REG	
+	/* Round 1 */
+	clr M_REG
+	ldi r17, 4
+1:
+	clr N_REG	
+	ldi r16, 4
+2:
+	movw r24, APTR_REG
+	movw r22, BPTR_REG 
+	mov r0, M_REG
+	lsl r0
+	lsl r0
+	add r0, N_REG
+	lsl r0
+	lsl r0
+	add r22, r0
+	adc r23, r1
+	mov r21, r16	
+	ldi r30, lo8(shift_table_1)
+	ldi r31, hi8(shift_table_1)
+	add r30, N_REG
+	adc r31, r1
+	lpm r20, Z
+	mov r19, I_REG
+	ldi r18, 0
+	rcall md5_core_asm
+	inc I_REG
+	inc N_REG
+	dec r16
+	brne 2b
+	inc M_REG
+	dec r17
+	brne 1b
+	
+	/* Round 2 */
+	clr M_REG
+	ldi r17, 4
+1:
+	clr N_REG	
+	ldi r16, 4
+2:
+	movw r24, APTR_REG
+	movw r22, BPTR_REG 
+	ldi r30, lo8(index_table_r2)
+	ldi r31, hi8(index_table_r2)
+	mov r0, M_REG
+	lsl r0
+	lsl r0
+	add r0, N_REG
+	add r30, r0
+	adc r31, r1
+	lpm r0, Z	
+	add r22, r0
+	adc r23, r1
+	mov r21, r16	
+	ldi r30, lo8(shift_table_2)
+	ldi r31, hi8(shift_table_2)
+	add r30, N_REG
+	adc r31, r1
+	lpm r20, Z
+	mov r19, I_REG
+	ldi r18, 1
+	rcall md5_core_asm
+	inc I_REG
+	inc N_REG
+	dec r16
+	brne 2b
+	inc M_REG
+	dec r17
+	brne 1b
+
+	/* Round 3 */
+	clr M_REG
+	ldi r17, 4
+1:
+	clr N_REG	
+	ldi r16, 4
+2:
+	movw r24, APTR_REG
+	movw r22, BPTR_REG 
+	ldi r30, lo8(index_table_r3)
+	ldi r31, hi8(index_table_r3)
+	mov r0, M_REG
+	lsl r0
+	lsl r0
+	add r0, N_REG
+	add r30, r0
+	adc r31, r1
+	lpm r0, Z	
+	add r22, r0
+	adc r23, r1
+	mov r21, r16	
+	ldi r30, lo8(shift_table_3)
+	ldi r31, hi8(shift_table_3)
+	add r30, N_REG
+	adc r31, r1
+	lpm r20, Z
+	mov r19, I_REG
+	ldi r18, 2
+	rcall md5_core_asm
+	inc I_REG
+	inc N_REG
+	dec r16
+	brne 2b
+	inc M_REG
+	dec r17
+	brne 1b
+
+	/* Round 4 */
+	clr M_REG
+	ldi r17, 4
+1:
+	clr N_REG	
+	ldi r16, 4
+2:
+	movw r24, APTR_REG
+	movw r22, BPTR_REG 
+	ldi r30, lo8(index_table_r4)
+	ldi r31, hi8(index_table_r4)
+	mov r0, M_REG
+	lsl r0
+	lsl r0
+	add r0, N_REG
+	add r30, r0
+	adc r31, r1
+	lpm r0, Z	
+	add r22, r0
+	adc r23, r1
+	mov r21, r16	
+	ldi r30, lo8(shift_table_4)
+	ldi r31, hi8(shift_table_4)
+	add r30, N_REG
+	adc r31, r1
+	lpm r20, Z
+	mov r19, I_REG
+	ldi r18, 3
+	rcall md5_core_asm
+	inc I_REG
+	inc N_REG
+	dec r16
+	brne 2b
+	inc M_REG
+	dec r17
+	brne 1b
+
+
+	pop r27
+	pop r26 /* X now points to the context */
+	movw r30, APTR_REG
+	ldi r16, 4
+1:
+	ld r0, X
+	ld r2, Z+
+	add r0, r2
+	st X+, r0	
+	ld r0, X
+	ld r2, Z+
+	adc r0, r2
+	st X+, r0	
+	ld r0, X
+	ld r2, Z+
+	adc r0, r2
+	st X+, r0	
+	ld r0, X
+	ld r2, Z+
+	adc r0, r2
+	st X+, r0	
+	dec r16
+	brne 1b
+
+	ld r0, X
+	inc r0
+	st X+, r0	
+	brne 2f
+	ld r0, X
+	inc r0
+	st X+, r0	
+	brne 2f
+	ld r0, X
+	inc r0
+	st X+, r0	
+	brne 2f	
+	ld r0, X
+	inc r0
+	st X+, r0	
+2:			
+
+	pop r29
+	pop r28
+	pop_range 2, 17
+	stack_free 16
+	ret
+
+;###############################################################################
+/*
+void md5_lastBlock(md5_ctx_t *state, const void* block, uint16_t length_b){
+	uint16_t l;
+	uint8_t b[64];
+	while (length_b >= 512){
+		md5_nextBlock(state, block);
+		length_b -= 512;
+		block = ((uint8_t*)block) + 512/8;
+	}
+	memset(b, 0, 64);
+	memcpy(b, block, length_b/8);
+	/ * insert padding one * /
+	l=length_b/8;
+	if(length_b%8){
+		uint8_t t;
+		t = ((uint8_t*)block)[l];
+		t |= (0x80>>(length_b%8));
+		b[l]=t;
+	}else{
+		b[l]=0x80;
+	}
+	/ * insert length value * /
+	if(l+sizeof(uint64_t) >= 512/8){
+		md5_nextBlock(state, b);
+		state->counter--;
+		memset(b, 0, 64-8);
+	}
+	*((uint64_t*)&b[64-sizeof(uint64_t)]) = (state->counter * 512) + length_b;
+	md5_nextBlock(state, b);
+}
+*/
+; state_ptr : r24,r25
+; block_ptr : r22,r23
+; length_b  : r20,r21
+.global md5_lastBlock
+md5_lastBlock:
+	stack_alloc_large 64
+	push_range 12, 17
+	push r30
+	push r31
+	movw r16, r20 /* length_b  */ 
+	movw r14, r22 /* block_ptr */
+	movw r12, r24 /* state_ptr */
+	ldi r18, 64
+2:
+	cpi r17, 2 /* hi8(512) */	
+	brlo 2f
+1:
+	movw r24, r12
+	movw r22, r14
+	rcall md5_nextBlock
+	add r14, r18
+	adc r15, r1
+	subi r17, 2
+	rjmp 2b
+2:
+	pop r31
+	pop r30
+
+	adiw r30, 1 /* adjust Z to point to buffer */
+	movw r26, r14
+	movw r24, r16
+	adiw r24, 7
+
+	lsr r25
+	ror r24
+	lsr r25
+	ror r24
+	lsr r24 /* r24 now holds how many bytes are to copy */
+    ldi r18, 64
+	sub r18, r24 /* r18 will hold the amount of used bytes in buffer */
+	tst r24
+4:
+	breq 5f
+	ld r0, X+
+	st Z+, r0 
+	dec r24
+	rjmp 4b /* Z points to the byte after msg in buffer */
+5:	/* append 1-bit */
+	mov r20, r16
+	ldi r19, 0x80
+	andi r20, 0x07
+	brne bit_fucking
+	st Z+, r19
+	dec r18 /* 'allocate' another byte in buffer */
+	rjmp after_bit_fucking
+bit_fucking:
+1:
+	lsr r19
+	dec r20
+	brne 1b
+	or r0, r19
+	st -Z, r0
+    adiw r30, 1
+after_bit_fucking:
+	clt	
+	cpi r18, 8
+	brmi 2f
+	set         /* store in t if the counter will also fit in this block (1 if fit)*/
+2:
+	tst r18
+	breq 2f
+1: /* fill remaning buffer with zeros */
+	st Z+, r1
+	dec r18
+	brne 1b
+2:
+	sbiw r30, 63
+	sbiw r30,  1
+	movw r14, r30 /* r14:r15 now points to buffer */	
+	brts load_counter
+	/* counter does not fit, finalize this block */
+	movw r24, r12
+	movw r22, r14
+	rcall md5_nextBlock
+	movw r30, r14
+	ldi r20, 64-8
+3:
+	st Z+, r1
+	dec r20
+	brne 3b
+	
+load_counter:		
+	movw r26, r12 /* X points to state */
+	adiw r26, 16
+	ld r19, X+
+	ld r20, X+
+	ld r21, X+
+	ld r22, X+
+	brts post_counter_decrement	/* do not decremen because counter fits */
+counter_decrement:
+	subi r19, 1
+	sbci r20, 0
+	sbci r21, 0
+	sbci r22, 0
+post_counter_decrement:
+	clr r18
+	clr r23
+	lsl r19
+	rol r20
+	rol r21
+	rol r22
+	rol r23
+	mov r18, r16 /* r16:r17 length_b */
+	add r19, r17
+	adc r20, r1
+	adc r21, r1
+	adc r22, r1
+	adc r23, r1
+	movw r30, r14
+	adiw r30, 64-8
+	st Z+, r18
+	st Z+, r19
+	st Z+, r20
+	st Z+, r21
+	st Z+, r22
+	st Z+, r23
+	st Z+, r1
+	st Z, r1
+
+	sbiw r30, 63
+;	sbiw r30, 1
+	movw r24, r12
+	movw r22, r30
+	rcall md5_nextBlock
+md5_lastBlock_exit:	
+	pop_range 12, 17
+	stack_free_large 64
+	ret
+
+
+;###############################################################################
+
+
+.global md5_ctx2hash
+md5_ctx2hash:
+	movw r26, r24
+	movw r30, r22
+	ldi r22, 16
+1:
+	ld r0, Z+
+	st X+, r0
+	dec r22
+	brne 1b	
+	ret
+
+
+;###############################################################################
+
+
+.global md5
+md5:
+	stack_alloc 20
+	push_range  8, 17
+	adiw r30, 1
+	movw  r8, r30 /* ctx           */
+	movw r10, r24 /* dest          */
+	movw r12, r22 /* msg           */
+	movw r14, r18 /* length (low)  */
+	movw r16, r20 /* length (high) */
+	movw r24, r30
+	rcall md5_init
+1:
+	tst r16
+	brne next_round
+	tst r17
+	breq last_round
+next_round:
+	movw r24,  r8
+	movw r22, r12
+	rcall md5_nextBlock
+	ldi r22, 64
+	add r12, r22
+	adc r13, r1
+	ldi r22, 2
+	sub r15, r22
+	sbci r16, 0
+	sbci r17, 0
+	rjmp 1b
+last_round:		
+	movw r24, r8
+	movw r22, r12
+	movw r20, r14
+	rcall md5_lastBlock
+	movw r24, r10
+	movw r22,  r8
+	rcall md5_ctx2hash
+	pop_range  8, 17
+	stack_free 20
+	ret
+
+
+
diff --git a/hmac-md5/md5.h b/hmac-md5/md5.h
new file mode 100644
index 0000000..6b65c4a
--- /dev/null
+++ b/hmac-md5/md5.h
@@ -0,0 +1,55 @@
+/* md5.h */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/* 
+ * File:	md5.h
+ * Author:	Daniel Otte
+ * Date: 	31.07.2006
+ * License: GPL
+ * Description: Implementation of the MD5 hash algorithm as described in RFC 1321
+ * 
+ */
+
+
+#ifndef MD5_H_
+#define MD5_H_
+
+#include <stdint.h>
+
+
+#define MD5_HASH_BITS  128
+#define MD5_HASH_BYTES (MD5_HASH_BITS/8)
+#define MD5_BLOCK_BITS 512
+#define MD5_BLOCK_BYTES (MD5_BLOCK_BITS/8)
+
+
+typedef struct md5_ctx_st {
+	uint32_t a[4];
+	uint32_t counter;
+} md5_ctx_t;
+
+typedef uint8_t md5_hash_t[MD5_HASH_BYTES];
+
+ 
+void md5_init(md5_ctx_t *s);
+void md5_nextBlock(md5_ctx_t *state, const void* block);
+void md5_lastBlock(md5_ctx_t *state, const void* block, uint16_t length);
+void md5_ctx2hash(md5_hash_t* dest, const md5_ctx_t* state);
+void md5(md5_hash_t* dest, const void* msg, uint32_t length_b);
+
+#endif /*MD5_H_*/
diff --git a/hmac-sha1.c b/hmac-sha1.c
deleted file mode 100644
index 56d90ac..0000000
--- a/hmac-sha1.c
+++ /dev/null
@@ -1,131 +0,0 @@
-/* hmac-sha1.c */
-/*
-    This file is part of the AVR-Crypto-Lib.
-    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-/**
- * 
- * implementation of HMAC as described in RFC2104
- * Author:      Daniel Otte
- * email:       daniel.otte@rub.de
- * License:     GPLv3 or later
- **/
-
-/* 
- * hmac = hash ( k^opad , hash( k^ipad  , msg))
- */
-
-#include <stdint.h>
-#include <string.h>
-#include "config.h"
-#include "sha1.h"
-#include "hmac-sha1.h"
-
-#define IPAD 0x36
-#define OPAD 0x5C
-
-
-#ifndef HMAC_SHORTONLY
-
-void hmac_sha1_init(hmac_sha1_ctx_t *s, const void* key, uint16_t keylength_b){
-	uint8_t buffer[SHA1_BLOCK_BYTES];
-	uint8_t i;
-	
-	memset(buffer, 0, SHA1_BLOCK_BYTES);
-	if (keylength_b > SHA1_BLOCK_BITS){
-		sha1((void*)buffer, key, keylength_b);
-	} else {
-		memcpy(buffer, key, (keylength_b+7)/8);
-	}
-	
-	for (i=0; i<SHA1_BLOCK_BYTES; ++i){
-		buffer[i] ^= IPAD;
-	}
-	sha1_init(&(s->a));
-	sha1_nextBlock(&(s->a), buffer);
-	
-	for (i=0; i<SHA1_BLOCK_BYTES; ++i){
-		buffer[i] ^= IPAD^OPAD;
-	}
-	sha1_init(&(s->b));
-	sha1_nextBlock(&(s->b), buffer);
-	
-	
-#if defined SECURE_WIPE_BUFFER
-	memset(buffer, 0, SHA1_BLOCK_BYTES);
-#endif
-}
-
-void hmac_sha1_nextBlock(hmac_sha1_ctx_t *s, const void* block){
-	sha1_nextBlock(&(s->a), block);
-}
-void hmac_sha1_lastBlock(hmac_sha1_ctx_t *s, const void* block, uint16_t length_b){
-	while(length_b>=SHA1_BLOCK_BITS){
-		sha1_nextBlock(&(s->a), block);
-		block = (uint8_t*)block + SHA1_BLOCK_BYTES;
-		length_b -= SHA1_BLOCK_BITS;
-	}
-	sha1_lastBlock(&(s->a), block, length_b);
-}
-
-void hmac_sha1_final(void* dest, hmac_sha1_ctx_t *s){
-	sha1_ctx2hash((sha1_hash_t*)dest, &(s->a));
-	sha1_lastBlock(&(s->b), dest, SHA1_HASH_BITS);
-	sha1_ctx2hash((sha1_hash_t*)dest, &(s->b));
-}
-
-#endif
-
-/*
- * keylength in bits!
- * message length in bits!
- */
-void hmac_sha1(void* dest, const void* key, uint16_t keylength_b, const void* msg, uint32_t msglength_b){ /* a one-shot*/
-	sha1_ctx_t s;
-	uint8_t i;
-	uint8_t buffer[SHA1_BLOCK_BYTES];
-	
-	memset(buffer, 0, SHA1_BLOCK_BYTES);
-	
-	/* if key is larger than a block we have to hash it*/
-	if (keylength_b > SHA1_BLOCK_BITS){
-		sha1((void*)buffer, key, keylength_b);
-	} else {
-		memcpy(buffer, key, (keylength_b+7)/8);
-	}
-	
-	for (i=0; i<SHA1_BLOCK_BYTES; ++i){
-		buffer[i] ^= IPAD;
-	}
-	sha1_init(&s);
-	sha1_nextBlock(&s, buffer);
-	while (msglength_b >= SHA1_BLOCK_BITS){
-		sha1_nextBlock(&s, msg);
-		msg = (uint8_t*)msg + SHA1_BLOCK_BYTES;
-		msglength_b -=  SHA1_BLOCK_BITS;
-	}
-	sha1_lastBlock(&s, msg, msglength_b);
-	/* since buffer still contains key xor ipad we can do ... */
-	for (i=0; i<SHA1_BLOCK_BYTES; ++i){
-		buffer[i] ^= IPAD ^ OPAD;
-	}
-	sha1_ctx2hash(dest, &s); /* save inner hash temporary to dest */
-	sha1_init(&s);
-	sha1_nextBlock(&s, buffer);
-	sha1_lastBlock(&s, dest, SHA1_HASH_BITS);
-	sha1_ctx2hash(dest, &s);
-}
-
diff --git a/hmac-sha1.h b/hmac-sha1.h
deleted file mode 100644
index d0be029..0000000
--- a/hmac-sha1.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/* hmac-sha1.h */
-/*
-    This file is part of the AVR-Crypto-Lib.
-    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-#ifndef HMACSHA1_H_
-#define HMACSHA1_H_
-
-#include "sha1.h"
-
-#define HMAC_SHA1_BITS        SHA1_HASH_BITS
-#define HMAC_SHA1_BYTES       SHA1_HASH_BYTES
-#define HMAC_SHA1_BLOCK_BITS  SHA1_BLOCK_BITS
-#define HMAC_SHA1_BLOCK_BYTES SHA1_BLOCK_BYTES
-
-typedef struct{
-	 sha1_ctx_t a, b;
-} hmac_sha1_ctx_t;
-
-
-void hmac_sha1_init(hmac_sha1_ctx_t *s, const void* key, uint16_t keylength_b);
-void hmac_sha1_nextBlock(hmac_sha1_ctx_t *s, const void* block);
-void hmac_sha1_lastBlock(hmac_sha1_ctx_t *s, const void* block, uint16_t length_b);
-void hmac_sha1_final(void* dest, hmac_sha1_ctx_t *s);
-
-void hmac_sha1(void* dest, const void* key, uint16_t keylength_b, const void* msg, uint32_t msglength_b);
-
-#endif /*HMACSHA1_H_*/
diff --git a/hmac-sha1/hmac-sha1.c b/hmac-sha1/hmac-sha1.c
new file mode 100644
index 0000000..56d90ac
--- /dev/null
+++ b/hmac-sha1/hmac-sha1.c
@@ -0,0 +1,131 @@
+/* hmac-sha1.c */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/**
+ * 
+ * implementation of HMAC as described in RFC2104
+ * Author:      Daniel Otte
+ * email:       daniel.otte@rub.de
+ * License:     GPLv3 or later
+ **/
+
+/* 
+ * hmac = hash ( k^opad , hash( k^ipad  , msg))
+ */
+
+#include <stdint.h>
+#include <string.h>
+#include "config.h"
+#include "sha1.h"
+#include "hmac-sha1.h"
+
+#define IPAD 0x36
+#define OPAD 0x5C
+
+
+#ifndef HMAC_SHORTONLY
+
+void hmac_sha1_init(hmac_sha1_ctx_t *s, const void* key, uint16_t keylength_b){
+	uint8_t buffer[SHA1_BLOCK_BYTES];
+	uint8_t i;
+	
+	memset(buffer, 0, SHA1_BLOCK_BYTES);
+	if (keylength_b > SHA1_BLOCK_BITS){
+		sha1((void*)buffer, key, keylength_b);
+	} else {
+		memcpy(buffer, key, (keylength_b+7)/8);
+	}
+	
+	for (i=0; i<SHA1_BLOCK_BYTES; ++i){
+		buffer[i] ^= IPAD;
+	}
+	sha1_init(&(s->a));
+	sha1_nextBlock(&(s->a), buffer);
+	
+	for (i=0; i<SHA1_BLOCK_BYTES; ++i){
+		buffer[i] ^= IPAD^OPAD;
+	}
+	sha1_init(&(s->b));
+	sha1_nextBlock(&(s->b), buffer);
+	
+	
+#if defined SECURE_WIPE_BUFFER
+	memset(buffer, 0, SHA1_BLOCK_BYTES);
+#endif
+}
+
+void hmac_sha1_nextBlock(hmac_sha1_ctx_t *s, const void* block){
+	sha1_nextBlock(&(s->a), block);
+}
+void hmac_sha1_lastBlock(hmac_sha1_ctx_t *s, const void* block, uint16_t length_b){
+	while(length_b>=SHA1_BLOCK_BITS){
+		sha1_nextBlock(&(s->a), block);
+		block = (uint8_t*)block + SHA1_BLOCK_BYTES;
+		length_b -= SHA1_BLOCK_BITS;
+	}
+	sha1_lastBlock(&(s->a), block, length_b);
+}
+
+void hmac_sha1_final(void* dest, hmac_sha1_ctx_t *s){
+	sha1_ctx2hash((sha1_hash_t*)dest, &(s->a));
+	sha1_lastBlock(&(s->b), dest, SHA1_HASH_BITS);
+	sha1_ctx2hash((sha1_hash_t*)dest, &(s->b));
+}
+
+#endif
+
+/*
+ * keylength in bits!
+ * message length in bits!
+ */
+void hmac_sha1(void* dest, const void* key, uint16_t keylength_b, const void* msg, uint32_t msglength_b){ /* a one-shot*/
+	sha1_ctx_t s;
+	uint8_t i;
+	uint8_t buffer[SHA1_BLOCK_BYTES];
+	
+	memset(buffer, 0, SHA1_BLOCK_BYTES);
+	
+	/* if key is larger than a block we have to hash it*/
+	if (keylength_b > SHA1_BLOCK_BITS){
+		sha1((void*)buffer, key, keylength_b);
+	} else {
+		memcpy(buffer, key, (keylength_b+7)/8);
+	}
+	
+	for (i=0; i<SHA1_BLOCK_BYTES; ++i){
+		buffer[i] ^= IPAD;
+	}
+	sha1_init(&s);
+	sha1_nextBlock(&s, buffer);
+	while (msglength_b >= SHA1_BLOCK_BITS){
+		sha1_nextBlock(&s, msg);
+		msg = (uint8_t*)msg + SHA1_BLOCK_BYTES;
+		msglength_b -=  SHA1_BLOCK_BITS;
+	}
+	sha1_lastBlock(&s, msg, msglength_b);
+	/* since buffer still contains key xor ipad we can do ... */
+	for (i=0; i<SHA1_BLOCK_BYTES; ++i){
+		buffer[i] ^= IPAD ^ OPAD;
+	}
+	sha1_ctx2hash(dest, &s); /* save inner hash temporary to dest */
+	sha1_init(&s);
+	sha1_nextBlock(&s, buffer);
+	sha1_lastBlock(&s, dest, SHA1_HASH_BITS);
+	sha1_ctx2hash(dest, &s);
+}
+
diff --git a/hmac-sha1/hmac-sha1.h b/hmac-sha1/hmac-sha1.h
new file mode 100644
index 0000000..d0be029
--- /dev/null
+++ b/hmac-sha1/hmac-sha1.h
@@ -0,0 +1,41 @@
+/* hmac-sha1.h */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+#ifndef HMACSHA1_H_
+#define HMACSHA1_H_
+
+#include "sha1.h"
+
+#define HMAC_SHA1_BITS        SHA1_HASH_BITS
+#define HMAC_SHA1_BYTES       SHA1_HASH_BYTES
+#define HMAC_SHA1_BLOCK_BITS  SHA1_BLOCK_BITS
+#define HMAC_SHA1_BLOCK_BYTES SHA1_BLOCK_BYTES
+
+typedef struct{
+	 sha1_ctx_t a, b;
+} hmac_sha1_ctx_t;
+
+
+void hmac_sha1_init(hmac_sha1_ctx_t *s, const void* key, uint16_t keylength_b);
+void hmac_sha1_nextBlock(hmac_sha1_ctx_t *s, const void* block);
+void hmac_sha1_lastBlock(hmac_sha1_ctx_t *s, const void* block, uint16_t length_b);
+void hmac_sha1_final(void* dest, hmac_sha1_ctx_t *s);
+
+void hmac_sha1(void* dest, const void* key, uint16_t keylength_b, const void* msg, uint32_t msglength_b);
+
+#endif /*HMACSHA1_H_*/
diff --git a/hmac-sha1/sha1-asm.S b/hmac-sha1/sha1-asm.S
new file mode 100644
index 0000000..f571685
--- /dev/null
+++ b/hmac-sha1/sha1-asm.S
@@ -0,0 +1,886 @@
+/* sha1-asm.S */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/*
+ * Author:	Daniel Otte
+ *
+ * License: GPLv3 or later
+*/
+; SHA1 implementation in assembler for AVR
+SHA1_BLOCK_BITS = 512
+SHA1_HASH_BITS = 160
+
+.macro precall
+	/* push r18 - r27, r30 - r31*/
+	push r0
+	push r1
+	push r18
+	push r19
+	push r20
+	push r21
+	push r22
+	push r23
+	push r24
+	push r25
+	push r26
+	push r27
+	push r30
+	push r31
+	clr r1
+.endm
+
+.macro postcall
+	pop r31
+	pop r30
+	pop r27
+	pop r26
+	pop r25
+	pop r24
+	pop r23
+	pop r22
+	pop r21
+	pop r20
+	pop r19
+	pop r18
+	pop r1
+	pop r0
+.endm
+
+
+.macro hexdump length
+	push r27
+	push r26
+	ldi r25, '\r'
+	mov r24, r25
+	call uart_putc
+	ldi r25, '\n'
+	mov r24, r25
+	call uart_putc
+	pop r26
+	pop r27
+	movw r24, r26
+.if \length > 16
+	ldi r22, lo8(16)
+	ldi r23, hi8(16)
+	push r27
+	push r26
+	call uart_hexdump
+	pop r26
+	pop r27
+	adiw r26, 16
+	hexdump \length-16
+.else
+	ldi r22, lo8(\length)
+	ldi r23, hi8(\length)
+	call uart_hexdump
+.endif
+.endm
+
+.macro delay
+/*	
+	push r0
+	push r1
+	clr r0
+1:	clr r1
+2:	dec r1
+	brne 2b
+	dec r0
+	brne 1b
+	pop r1
+	pop r0  // */
+.endm
+
+/* X points to Block */
+.macro dbg_hexdump length
+/*	
+	precall
+	hexdump \length
+	postcall
+	// */
+.endm
+
+
+
+.section .text
+
+SPL = 0x3D
+SPH = 0x3E
+SREG = 0x3F
+
+
+;
+;sha1_ctx_t is:
+;
+; [h0][h1][h2][h3][h4][length]
+; hn is 32 bit large, length is 64 bit large
+
+;###########################################################	
+
+.global sha1_ctx2hash
+; === sha1_ctx2hash ===
+; this function converts a state into a normal hash (bytestring)
+;  param1: the 16-bit destination pointer
+;	given in r25,r24 (r25 is most significant)
+;  param2: the 16-bit pointer to sha1_ctx structure
+;	given in r23,r22
+sha1_ctx2hash:
+	movw r26, r22
+	movw r30, r24
+	ldi r21, 5
+	sbiw r26, 4
+1:	
+	ldi r20, 4
+	adiw r26, 8
+2:	
+		ld r0, -X
+		st Z+, r0	
+	dec r20
+	brne 2b
+	
+	dec r21
+	brne 1b
+	
+	ret
+
+;###########################################################	
+
+.global sha1
+; === sha1 ===
+; this function calculates SHA-1 hashes from messages in RAM
+;  param1: the 16-bit hash destination pointer
+;	given in r25,r24 (r25 is most significant)
+;  param2: the 16-bit pointer to message
+;	given in r23,r22
+;  param3: 32-bit length value (length of message in bits)
+;   given in r21,r20,r19,r18
+sha1:
+sha1_prolog:
+	push r8
+	push r9
+	push r10
+	push r11
+	push r12
+	push r13
+	push r16
+	push r17
+	in r16, SPL
+	in r17, SPH
+	subi r16, 5*4+8 
+	sbci r17, 0	
+	in r0, SREG
+	cli
+	out SPL, r16
+	out SPH, r17
+	out SREG, r0
+	
+	push r25
+	push r24
+	inc r16
+	adc r17, r1
+	
+	movw r8, r18		/* backup of length*/
+	movw r10, r20
+	
+	movw r12, r22	/* backup pf msg-ptr */
+	
+	movw r24, r16
+	rcall sha1_init
+	/* if length >= 512 */
+1:
+	tst r11
+	brne 4f
+	tst r10
+	brne 4f
+	mov r19, r9
+	cpi r19, 0x02
+	brlo 4f
+	
+	movw r24, r16
+	movw r22, r12
+	rcall sha1_nextBlock
+	ldi r19, 0x64
+	add r22, r19
+	adc r23, r1
+	/* length -= 512 */
+	ldi r19, 0x02
+	sub r9, r19
+	sbc r10, r1
+	sbc r11, r1
+	rjmp 1b
+	
+4:
+	movw r24, r16
+	movw r22, r12
+	movw r20, r8
+	rcall sha1_lastBlock
+	
+	pop r24
+	pop r25
+	movw r22, r16
+	rcall sha1_ctx2hash	
+	
+sha1_epilog:
+	in r30, SPL
+	in r31, SPH
+	adiw r30, 5*4+8 	
+	in r0, SREG
+	cli
+	out SPL, r30
+	out SPH, r31
+	out SREG, r0
+	pop r17
+	pop r16
+	pop r13
+	pop r12
+	pop r11
+	pop r10
+	pop r9
+	pop r8
+	ret
+
+;###########################################################	
+
+
+; block MUST NOT be larger than 64 bytes
+
+.global sha1_lastBlock
+; === sha1_lastBlock ===
+; this function does padding & Co. for calculating SHA-1 hashes
+;  param1: the 16-bit pointer to sha1_ctx structure
+;	given in r25,r24 (r25 is most significant)
+;  param2: an 16-bit pointer to 64 byte block to hash
+;	given in r23,r22
+;  param3: an 16-bit integer specifing length of block in bits
+;	given in r21,r20
+sha1_lastBlock_localSpace = (SHA1_BLOCK_BITS/8+1)
+
+
+sha1_lastBlock:
+	cpi r21, 0x02
+	brlo sha1_lastBlock_prolog
+	push r25
+	push r24
+	push r23
+	push r22
+	push r21
+	push r20
+	rcall sha1_nextBlock
+	pop r20
+	pop r21
+	pop r22
+	pop r23
+	pop r24
+	pop r25
+	subi r21, 2
+	subi r23, -2
+	rjmp sha1_lastBlock
+sha1_lastBlock_prolog:
+	/* allocate space on stack */
+	in r30, SPL
+	in r31, SPH
+	in r1, SREG
+	subi r30, lo8(64)
+	sbci r31, hi8(64) /* ??? */
+	cli
+	out SPL, r30
+	out SPH, r31
+	out SREG,r1
+
+	adiw r30, 1 /* SP points to next free byte on stack */
+	mov r18, r20 /* r20 = LSB(length) */
+	lsr r18
+	lsr r18
+	lsr r18
+	bst r21, 0	/* may be we should explain this ... */
+	bld r18, 5  /* now: r18 == length/8 (aka. length in bytes) */
+	
+	
+	movw r26, r22 /* X points to begin of msg */
+	tst r18
+	breq sha1_lastBlock_post_copy
+	mov r1, r18
+sha1_lastBlock_copy_loop:
+	ld r0, X+
+	st Z+, r0
+	dec r1
+	brne sha1_lastBlock_copy_loop
+sha1_lastBlock_post_copy:	
+sha1_lastBlock_insert_stuffing_bit:	
+	ldi r19, 0x80
+	mov r0,r19 	
+	ldi r19, 0x07
+	and r19, r20 /* if we are in bitmode */
+	breq 2f	/* no bitmode */
+1:	
+	lsr r0
+	dec r19
+	brne 1b
+	ld r19, X
+/* maybe we should do some ANDing here, just for safety */
+	or r0, r19
+2:	
+	st Z+, r0
+	inc r18
+
+/* checking stuff here */
+	cpi r18, 64-8+1
+	brsh 0f 
+	rjmp sha1_lastBlock_insert_zeros
+0:
+	/* oh shit, we landed here */
+	/* first we have to fill it up with zeros */
+	ldi r19, 64
+	sub r19, r18
+	breq 2f
+1:	
+	st Z+, r1
+	dec r19
+	brne 1b	
+2:	
+	sbiw r30, 63
+	sbiw r30,  1
+	movw r22, r30
+	
+	push r31
+	push r30
+	push r25
+	push r24
+	push r21
+	push r20
+	rcall sha1_nextBlock
+	pop r20
+	pop r21
+	pop r24
+	pop r25
+	pop r30
+	pop r31
+	
+	/* now we should subtract 512 from length */
+	movw r26, r24
+	adiw r26, 4*5+1 /* we can skip the lowest byte */
+	ld r19, X
+	subi r19, hi8(512)
+	st X+, r19
+	ldi r18, 6
+1:
+	ld r19, X
+	sbci r19, 0
+	st X+, r19
+	dec r18
+	brne 1b
+	
+;	clr r18 /* not neccessary ;-) */
+	/* reset Z pointer to begin of block */
+
+sha1_lastBlock_insert_zeros:	
+	ldi r19, 64-8
+	sub r19, r18
+	breq sha1_lastBlock_insert_length
+	clr r1
+1:
+	st Z+, r1	/* r1 is still zero */
+	dec r19
+	brne 1b
+
+;	rjmp sha1_lastBlock_epilog
+sha1_lastBlock_insert_length:
+	movw r26, r24	/* X points to state */
+	adiw r26, 5*4	/* X points to (state.length) */
+	adiw r30, 8		/* Z points one after the last byte of block */
+	ld r0, X+
+	add r0, r20
+	st -Z, r0
+	ld r0, X+
+	adc r0, r21
+	st -Z, r0
+	ldi r19, 6
+1:
+	ld r0, X+
+	adc r0, r1
+	st -Z, r0
+	dec r19
+	brne 1b
+
+	sbiw r30, 64-8
+	movw r22, r30
+	rcall sha1_nextBlock
+
+sha1_lastBlock_epilog:
+	in r30, SPL
+	in r31, SPH
+	in r1, SREG
+	adiw r30, 63 ; lo8(64)
+	adiw r30,  1  ; hi8(64)
+	cli
+	out SPL, r30
+	out SPH, r31
+	out SREG,r1
+	clr r1
+	clr r0
+	ret
+
+/**/
+;###########################################################	
+
+.global sha1_nextBlock
+; === sha1_nextBlock ===
+; this is the core function for calculating SHA-1 hashes
+;  param1: the 16-bit pointer to sha1_ctx structure
+;	given in r25,r24 (r25 is most significant)
+;  param2: an 16-bit pointer to 64 byte block to hash
+;	given in r23,r22
+sha1_nextBlock_localSpace = (16+5+1)*4 ; 16 32-bit values for w array and 5 32-bit values for a array (total 84 byte)
+
+xtmp = 0
+xNULL = 1
+W1 = 10
+W2 = 11
+T1	= 12
+T2	= 13
+T3	= 14
+T4	= 15
+LoopC = 16
+S	  = 17
+tmp1 = 18
+tmp2 = 19
+tmp3 = 20
+tmp4 = 21
+F1 = 22
+F2 = 23
+F3 = 24
+F4 = 25
+
+/* byteorder: high number <--> high significance */
+sha1_nextBlock:
+ ; initial, let's make some space ready for local vars
+ 			 /* replace push & pop by mem ops? */
+	push r10
+	push r11
+	push r12
+	push r13
+	push r14
+	push r15
+	push r16
+	push r17
+	push r28
+	push r29
+	in r20, SPL
+	in r21, SPH
+	movw r18, r20			;backup SP
+;	movw r26, r20			; X points to free space on stack /* maybe removeable? */ 
+	movw r30, r22			; Z points to message
+	subi r20, lo8(sha1_nextBlock_localSpace) ;sbiw can do only up to 63
+	sbci r21, hi8(sha1_nextBlock_localSpace)
+	movw r26, r20			; X points to free space on stack 
+	in r0, SREG
+	cli ; we want to be uninterrupted while updating SP
+	out SPL, r20
+	out SPH, r21
+	out SREG, r0
+	
+	push r18
+	push r19 /* push old SP on new stack */
+	push r24
+	push r25 /* param1 will be needed later */
+	
+	/* load a[] with state */
+	movw 28, r24 /* load pointer to state in Y */
+	adiw r26, 1 ; X++
+
+	ldi LoopC, 5*4	
+1:	ld tmp1, Y+
+	st X+, tmp1
+	dec LoopC
+	brne 1b
+
+	movw W1, r26 /* save pointer to w[0] */
+	/* load w[] with endian fixed message */
+		/* we might also use the changeendian32() function at bottom */
+	movw r30, r22 /* mv param2 (ponter to msg) to Z */	
+	ldi LoopC, 16
+1:
+	ldd tmp1, Z+3
+	st X+, tmp1
+	ldd tmp1, Z+2
+	st X+, tmp1
+	ldd tmp1, Z+1
+	st X+, tmp1
+	ld tmp1, Z
+	st X+, tmp1
+	adiw r30, 4
+	dec LoopC
+	brne 1b
+	
+	;clr LoopC /* LoopC is named t in FIPS 180-2 */	
+	clr xtmp
+sha1_nextBlock_mainloop:
+	mov S, LoopC
+	lsl S
+	lsl S
+	andi S, 0x3C /* S is a bytepointer so *4 */
+	/* load w[s] */
+	movw r26, W1
+	add r26, S /* X points at w[s] */
+	adc r27, xNULL
+	ld T1, X+
+	ld T2, X+
+	ld T3, X+
+	ld T4, X+
+
+	/**/
+	push r26
+	push r27
+	push T4
+	push T3
+	push T2
+	push T1
+	in r26, SPL
+	in r27, SPH
+	adiw r26, 1
+	dbg_hexdump 4
+	pop T1
+	pop T2
+	pop T3
+	pop T4
+	pop r27
+	pop r26
+	/**/
+
+	cpi LoopC, 16
+	brlt sha1_nextBlock_mainloop_core
+	/* update w[s] */
+	ldi tmp1, 2*4
+	rcall 1f
+	ldi tmp1, 8*4
+	rcall 1f
+	ldi tmp1, 13*4
+	rcall 1f
+	rjmp 2f
+1:		/* this might be "outsourced" to save the jump above */
+	add tmp1, S
+	andi tmp1, 0x3f
+	movw r26, W1
+	add r26, tmp1
+	adc r27, xNULL
+	ld tmp2, X+
+	eor T1, tmp2
+	ld tmp2, X+
+	eor T2, tmp2
+	ld tmp2, X+
+	eor T3, tmp2
+	ld tmp2, X+
+	eor T4, tmp2
+	ret
+2:	/* now we just hav to do a ROTL(T) and save T back */
+	mov tmp2, T4
+	rol tmp2
+	rol T1
+	rol T2
+	rol T3
+	rol T4
+	movw r26, W1
+	add r26, S
+	adc r27, xNULL
+	st X+, T1
+	st X+, T2
+	st X+, T3
+	st X+, T4
+	
+sha1_nextBlock_mainloop_core:	/* ther core function; T=ROTL5(a) ....*/	
+								/* T already contains w[s] */
+	movw r26, W1
+	sbiw r26, 4*1		/* X points at a[4] aka e */
+	ld tmp1, X+ 
+	add T1, tmp1
+	ld tmp1, X+ 
+	adc T2, tmp1
+	ld tmp1, X+ 
+	adc T3, tmp1
+	ld tmp1, X+ 
+	adc T4, tmp1		/* T = w[s]+e */
+	sbiw r26, 4*5		/* X points at a[0] aka a */
+	ld F1, X+ 
+	ld F2, X+ 
+	ld F3, X+ 
+	ld F4, X+ 
+	mov tmp1, F4		/* X points at a[1] aka b */
+	ldi tmp2, 5
+1:
+	rol tmp1
+	rol F1
+	rol F2
+	rol F3
+	rol F4
+	dec tmp2
+	brne 1b
+	
+	add T1, F1
+	adc T2, F2
+	adc T3, F3
+	adc T4, F4 /* T = ROTL(a,5) + e + w[s] */
+	
+	/* now we have to do this fucking conditional stuff */
+	ldi r30, lo8(sha1_nextBlock_xTable)
+	ldi r31, hi8(sha1_nextBlock_xTable)
+	add r30, xtmp
+	adc r31, xNULL
+	lpm tmp1, Z
+	cp tmp1, LoopC
+	brne 1f
+	inc xtmp
+1:	ldi r30, lo8(sha1_nextBlock_KTable)
+	ldi r31, hi8(sha1_nextBlock_KTable)
+	lsl xtmp
+	lsl xtmp
+	add r30, xtmp
+	adc r31, xNULL
+	lsr xtmp
+	lsr xtmp
+	 
+	lpm tmp1, Z+
+	add T1, tmp1
+	lpm tmp1, Z+
+	adc T2, tmp1
+	lpm tmp1, Z+
+	adc T3, tmp1
+	lpm tmp1, Z+
+	adc T4, tmp1
+			/* T = ROTL(a,5) + e + kt + w[s] */
+	
+	/* Z-4 is just pointing to kt ... */
+	movw r28, r26 /* copy X in Y */
+	adiw r30, 3*4 /* now Z points to the rigth locatin in our jump-vector-table */
+	lsr r31
+	ror r30
+		
+	icall
+	mov F1, tmp1
+	icall
+	mov F2, tmp1
+	icall
+	mov F3, tmp1
+	icall
+	
+	add T1, F1
+	adc T2, F2
+	adc T3, F3
+	adc T4, tmp1 /* T = ROTL5(a) + f_t(b,c,d) + e + k_t + w[s] */
+				 /* X points still at a[1] aka b, Y points at a[2] aka c */	
+	/* update a[] */
+sha1_nextBlock_update_a:
+	/*first we move all vars in a[] "one up" e=d, d=c, c=b, b=a*/
+	//adiw r28, 3*4  /* Y should point at a[4] aka e */
+	movw r28, W1
+	sbiw r28, 4
+	
+	ldi tmp2, 4*4 
+1:	
+	ld tmp1, -Y
+	std Y+4, tmp1
+	dec tmp2
+	brne 1b
+	/* Y points at a[0] aka a*/
+	
+	movw r28, W1
+	sbiw r28, 5*4
+	/* store T in a[0] aka a */
+	st Y+, T1
+	st Y+, T2
+	st Y+, T3
+	st Y+, T4
+	/* Y points at a[1] aka b*/
+	
+	/* rotate c */
+	ldd T1, Y+1*4
+	ldd T2, Y+1*4+1
+	ldd T3, Y+1*4+2
+	ldd T4, Y+1*4+3
+	mov tmp1, T1
+	ldi tmp2, 2
+1:	ror tmp1
+	ror T4
+	ror T3
+	ror T2
+	ror T1
+	dec tmp2
+	brne 1b
+	std Y+1*4+0, T1
+	std Y+1*4+1, T2
+	std Y+1*4+2, T3
+	std Y+1*4+3, T4
+	
+	push r27
+	push r26
+	movw r26, W1
+	sbiw r26, 4*5
+	dbg_hexdump 4*5
+	pop r26
+	pop r27
+	
+	inc LoopC
+	cpi LoopC, 80
+	brge 1f
+	rjmp sha1_nextBlock_mainloop
+/**************************************/
+1:	
+   /* littel patch */
+	sbiw r28, 4
+
+/* add a[] to state and inc length */	
+	pop r27
+	pop r26		/* now X points to state (and Y still at a[0]) */
+	ldi tmp4, 5
+1:	clc
+	ldi tmp3, 4
+2:	ld tmp1, X
+	ld tmp2, Y+
+	adc tmp1, tmp2
+	st X+, tmp1
+	dec tmp3
+	brne 2b
+	dec tmp4
+	brne 1b
+	
+	/* now length += 512 */
+	adiw r26, 1 /* we skip the least significant byte */
+	ld tmp1, X
+	ldi tmp2, hi8(512) /* 2 */
+	add tmp1, tmp2
+	st X+, tmp1
+	ldi tmp2, 6
+1:
+	ld tmp1, X
+	adc tmp1, xNULL
+	st X+, tmp1
+	dec tmp2
+	brne 1b
+	
+; EPILOG
+sha1_nextBlock_epilog:
+/* now we should clean up the stack */
+	pop r21
+	pop r20
+	in r0, SREG
+	cli ; we want to be uninterrupted while updating SP
+	out SPL, r20
+	out SPH, r21
+	out SREG, r0
+	
+	clr r1
+	pop r29
+	pop r28
+	pop r17
+	pop r16
+	pop r15
+	pop r14
+	pop r13
+	pop r12
+	pop r11
+	pop r10
+	ret
+
+sha1_nextBlock_xTable:
+.byte 20,40,60,0
+sha1_nextBlock_KTable:
+.int	0x5a827999 
+.int	0x6ed9eba1 
+.int	0x8f1bbcdc 
+.int	0xca62c1d6
+sha1_nextBlock_JumpTable:
+rjmp sha1_nextBlock_Ch
+	nop	
+rjmp sha1_nextBlock_Parity
+	nop
+rjmp sha1_nextBlock_Maj
+	nop
+rjmp sha1_nextBlock_Parity
+
+	 /* X and Y still point at a[1] aka b ; return value in tmp1 */
+sha1_nextBlock_Ch:
+	ld tmp1, Y+
+	mov tmp2, tmp1
+	com tmp2
+	ldd tmp3, Y+3	/* load from c */
+	and tmp1, tmp3
+	ldd tmp3, Y+7	/* load from d */
+	and tmp2, tmp3
+	eor tmp1, tmp2
+	ret
+	
+sha1_nextBlock_Maj:
+	ld tmp1, Y+
+	mov tmp2, tmp1
+	ldd tmp3, Y+3	/* load from c */
+	and tmp1, tmp3
+	ldd tmp4, Y+7	/* load from d */
+	and tmp2, tmp4
+	eor tmp1, tmp2
+	and tmp3, tmp4
+	eor tmp1, tmp3
+	ret
+
+sha1_nextBlock_Parity:
+	ld tmp1, Y+
+	ldd tmp2, Y+3	/* load from c */
+	eor tmp1, tmp2
+	ldd tmp2, Y+7	/* load from d */
+	eor tmp1, tmp2
+	ret
+/*	
+ch_str:			.asciz "\r\nCh"
+maj_str:		.asciz "\r\nMaj"
+parity_str:	.asciz "\r\nParity"
+*/
+;###########################################################	
+
+.global sha1_init 
+;void sha1_init(sha1_ctx_t *state){
+;	DEBUG_S("\r\nSHA1_INIT");
+;	state->h[0] = 0x67452301;
+;	state->h[1] = 0xefcdab89;
+;	state->h[2] = 0x98badcfe;
+;	state->h[3] = 0x10325476;
+;	state->h[4] = 0xc3d2e1f0;
+;	state->length = 0;
+;}
+; param1: (Func3,r24) 16-bit pointer to sha1_ctx_t struct in ram
+; modifys: Z(r30,r31), Func1, r22
+sha1_init:
+	movw r26, r24 ; (24,25) --> (26,27) load X with param1
+	ldi r30, lo8((sha1_init_vector))
+	ldi r31, hi8((sha1_init_vector))
+	ldi r22, 5*4 /* bytes to copy */
+sha1_init_vloop:	
+	lpm r23, Z+ 
+	st X+, r23
+	dec r22
+	brne sha1_init_vloop
+	ldi r22, 8
+sha1_init_lloop:
+	st X+, r1
+	dec r22
+	brne sha1_init_lloop
+	ret
+	
+sha1_init_vector:
+.int 0x67452301;
+.int 0xefcdab89;
+.int 0x98badcfe;
+.int 0x10325476;
+.int 0xc3d2e1f0;
+
diff --git a/hmac-sha1/sha1.h b/hmac-sha1/sha1.h
new file mode 100644
index 0000000..6675d20
--- /dev/null
+++ b/hmac-sha1/sha1.h
@@ -0,0 +1,117 @@
+/* sha1.h */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/**
+ * \file	sha1.h
+ * \author	Daniel Otte
+ * \email   daniel.otte@rub.de
+ * \date	2006-10-08
+ * \license GPLv3 or later
+ * \brief   SHA-1 declaration.
+ * \ingroup SHA-1
+ * 
+ */
+ 
+#ifndef SHA1_H_
+#define SHA1_H_
+
+#include <stdint.h>
+/** \def SHA1_HASH_BITS
+ * definees the size of a SHA-1 hash in bits 
+ */
+
+/** \def SHA1_HASH_BYTES
+ * definees the size of a SHA-1 hash in bytes 
+ */
+
+/** \def SHA1_BLOCK_BITS
+ * definees the size of a SHA-1 input block in bits 
+ */
+
+/** \def SHA1_BLOCK_BYTES
+ * definees the size of a SHA-1 input block in bytes 
+ */
+#define SHA1_HASH_BITS  160
+#define SHA1_HASH_BYTES (SHA1_HASH_BITS/8)
+#define SHA1_BLOCK_BITS 512
+#define SHA1_BLOCK_BYTES (SHA1_BLOCK_BITS/8)
+
+/** \typedef sha1_ctx_t
+ * \brief SHA-1 context type
+ * 
+ * A vatiable of this type may hold the state of a SHA-1 hashing process
+ */
+typedef struct {
+	uint32_t h[5];
+	uint64_t length;
+} sha1_ctx_t;
+
+/** \typedef sha1_hash_t
+ * \brief hash value type
+ * A variable of this type may hold a SHA-1 hash value 
+ */
+typedef uint8_t sha1_hash_t[SHA1_HASH_BITS/8];
+
+/** \fn sha1_init(sha1_ctx_t *state)
+ * \brief initializes a SHA-1 context
+ * This function sets a ::sha1_ctx_t variable to the initialization vector
+ * for SHA-1 hashing.
+ * \param state pointer to the SHA-1 context variable
+ */
+void sha1_init(sha1_ctx_t *state);
+
+/** \fn sha1_nextBlock(sha1_ctx_t *state, const void* block)
+ *  \brief process one input block
+ * This function processes one input block and updates the hash context 
+ * accordingly
+ * \param state pointer to the state variable to update
+ * \param block pointer to the message block to process
+ */
+void sha1_nextBlock (sha1_ctx_t *state, const void* block);
+
+/** \fn sha1_lastBlock(sha1_ctx_t *state, const void* block, uint16_t length_b)
+ * \brief processes the given block and finalizes the context
+ * This function processes the last block in a SHA-1 hashing process.
+ * The block should have a maximum length of a single input block.
+ * \param state pointer to the state variable to update and finalize
+ * \param block pointer to themessage block to process
+ * \param length_b length of the message block in bits  
+ */
+void sha1_lastBlock (sha1_ctx_t *state, const void* block, uint16_t length_b);
+
+/** \fn sha1_ctx2hash(sha1_hash_t *dest, sha1_ctx_t *state)
+ * \brief convert a state variable into an actual hash value
+ * Writes the hash value corresponding to the state to the memory pointed by dest.
+ * \param dest pointer to the hash value destination
+ * \param state pointer to the hash context
+ */ 
+void sha1_ctx2hash (sha1_hash_t *dest, sha1_ctx_t *state);
+
+/** \fn sha1(sha1_hash_t *dest, const void* msg, uint32_t length_b)
+ * \brief hashing a message which in located entirely in RAM
+ * This function automatically hashes a message which is entirely in RAM with
+ * the SHA-1 hashing algorithm.
+ * \param dest pointer to the hash value destination
+ * \param msg  pointer to the message which should be hashed
+ * \param length_b length of the message in bits
+ */ 
+void sha1(sha1_hash_t *dest, const void* msg, uint32_t length_b);
+
+
+
+#endif /*SHA1_H_*/
diff --git a/hmac-sha256.c b/hmac-sha256.c
deleted file mode 100644
index 6a57189..0000000
--- a/hmac-sha256.c
+++ /dev/null
@@ -1,130 +0,0 @@
-/* hmac-sha256.c */
-/*
-    This file is part of the AVR-Crypto-Lib.
-    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-/**
- * 
- * implementation of HMAC as described in RFC2104
- * Author:      Daniel Otte
- * email:       daniel.otte@rub.de
- * License:     GPLv3 or later
- **/
-
-/* 
- * hmac = hash ( k^opad , hash( k^ipad  , msg))
- */
-
-#include <stdint.h>
-#include <string.h>
-#include "config.h"
-#include "sha256.h"
-#include "hmac-sha256.h"
-
-#define IPAD 0x36
-#define OPAD 0x5C
-
-#ifndef HMAC_SHA256_SHORTONLY
-
-void hmac_sha256_init(hmac_sha256_ctx_t *s, const void* key, uint16_t keylength_b){
-	uint8_t buffer[HMAC_SHA256_BLOCK_BYTES];
-	uint8_t i;
-	
-	memset(buffer, 0, HMAC_SHA256_BLOCK_BYTES);
-	if (keylength_b > HMAC_SHA256_BLOCK_BITS){
-		sha256((void*)buffer, key, keylength_b);
-	} else {
-		memcpy(buffer, key, (keylength_b+7)/8);
-	}
-	
-	for (i=0; i<HMAC_SHA256_BLOCK_BYTES; ++i){
-		buffer[i] ^= IPAD;
-	}
-	
-	sha256_init(&(s->a));
-	sha256_nextBlock(&(s->a), buffer);
-	
-	for (i=0; i<HMAC_SHA256_BLOCK_BYTES; ++i){
-		buffer[i] ^= IPAD^OPAD;
-	}
-	sha256_init(&(s->b));
-	sha256_nextBlock(&(s->b), buffer);
-	
-#if defined SECURE_WIPE_BUFFER
-	memset(buffer, 0, SHA256_BLOCK_BYTES);
-#endif
-}
-
-void hmac_sha256_nextBlock(hmac_sha256_ctx_t *s, const void* block){
-	sha256_nextBlock(&(s->a), block);
-}
-
-void hmac_sha256_lastBlock(hmac_sha256_ctx_t *s, const void* block, uint16_t length_b){
-/*	while(length_b>=SHA256_BLOCK_BITS){
-		sha256_nextBlock(&(s->a), block);
-		block = (uint8_t*)block + SHA256_BLOCK_BYTES;
-		length_b -= SHA256_BLOCK_BITS;
-	}
-*/	sha256_lastBlock(&(s->a), block, length_b);
-}
-
-void hmac_sha256_final(void* dest, hmac_sha256_ctx_t *s){
-	sha256_ctx2hash((sha256_hash_t*)dest, &(s->a));
-	sha256_lastBlock(&(s->b), dest, SHA256_HASH_BITS);
-	sha256_ctx2hash((sha256_hash_t*)dest, &(s->b));			
-}
-
-#endif
-
-/*
- * keylength in bits!
- * message length in bits!
- */
-void hmac_sha256(void* dest, const void* key, uint16_t keylength_b, const void* msg, uint32_t msglength_b){ /* a one-shot*/
-	sha256_ctx_t s;
-	uint8_t i;
-	uint8_t buffer[HMAC_SHA256_BLOCK_BYTES];
-	
-	memset(buffer, 0, HMAC_SHA256_BLOCK_BYTES);
-	
-	/* if key is larger than a block we have to hash it*/
-	if (keylength_b > SHA256_BLOCK_BITS){
-		sha256((void*)buffer, key, keylength_b);
-	} else {
-		memcpy(buffer, key, (keylength_b+7)/8);
-	}
-	
-	for (i=0; i<SHA256_BLOCK_BYTES; ++i){
-		buffer[i] ^= IPAD;
-	}
-	sha256_init(&s);
-	sha256_nextBlock(&s, buffer);
-	while (msglength_b >= HMAC_SHA256_BLOCK_BITS){
-		sha256_nextBlock(&s, msg);
-		msg = (uint8_t*)msg + HMAC_SHA256_BLOCK_BYTES;
-		msglength_b -=  HMAC_SHA256_BLOCK_BITS;
-	}
-	sha256_lastBlock(&s, msg, msglength_b);
-	/* since buffer still contains key xor ipad we can do ... */
-	for (i=0; i<HMAC_SHA256_BLOCK_BYTES; ++i){
-		buffer[i] ^= IPAD ^ OPAD;
-	}
-	sha256_ctx2hash(dest, &s); /* save inner hash temporary to dest */
-	sha256_init(&s);
-	sha256_nextBlock(&s, buffer);
-	sha256_lastBlock(&s, dest, SHA256_HASH_BITS);
-	sha256_ctx2hash(dest, &s);
-}
diff --git a/hmac-sha256.h b/hmac-sha256.h
deleted file mode 100644
index 4df6af5..0000000
--- a/hmac-sha256.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/* hmac-sha256.h */
-/*
-    This file is part of the AVR-Crypto-Lib.
-    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-#ifndef HMACSHA256_H_
-#define HMACSHA256_H_
-
-#include "sha256.h"
-
-#define HMAC_SHA256_BITS        SHA256_HASH_BITS
-#define HMAC_SHA256_BYTES       SHA256_HASH_BYTES
-#define HMAC_SHA256_BLOCK_BITS  SHA256_BLOCK_BITS
-#define HMAC_SHA256_BLOCK_BYTES SHA256_BLOCK_BYTES
-
-
-typedef struct {
-	sha256_ctx_t a,b;
-} hmac_sha256_ctx_t;
-
-
-void hmac_sha256_init(hmac_sha256_ctx_t *s, const void* key, uint16_t keylength_b);
-void hmac_sha256_nextBlock(hmac_sha256_ctx_t *s, const void* block);
-void hmac_sha256_lastBlock(hmac_sha256_ctx_t *s, const void* block, uint16_t length_b);
-void hmac_sha256_final(void* dest, hmac_sha256_ctx_t *s);
-
-void hmac_sha256(void* dest, const void* key, uint16_t keylength_b, const void* msg, uint32_t msglength_b);
-
-
-#endif /*HMACSHA256_H_*/
diff --git a/hmac-sha256/hmac-sha256.c b/hmac-sha256/hmac-sha256.c
new file mode 100644
index 0000000..6a57189
--- /dev/null
+++ b/hmac-sha256/hmac-sha256.c
@@ -0,0 +1,130 @@
+/* hmac-sha256.c */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/**
+ * 
+ * implementation of HMAC as described in RFC2104
+ * Author:      Daniel Otte
+ * email:       daniel.otte@rub.de
+ * License:     GPLv3 or later
+ **/
+
+/* 
+ * hmac = hash ( k^opad , hash( k^ipad  , msg))
+ */
+
+#include <stdint.h>
+#include <string.h>
+#include "config.h"
+#include "sha256.h"
+#include "hmac-sha256.h"
+
+#define IPAD 0x36
+#define OPAD 0x5C
+
+#ifndef HMAC_SHA256_SHORTONLY
+
+void hmac_sha256_init(hmac_sha256_ctx_t *s, const void* key, uint16_t keylength_b){
+	uint8_t buffer[HMAC_SHA256_BLOCK_BYTES];
+	uint8_t i;
+	
+	memset(buffer, 0, HMAC_SHA256_BLOCK_BYTES);
+	if (keylength_b > HMAC_SHA256_BLOCK_BITS){
+		sha256((void*)buffer, key, keylength_b);
+	} else {
+		memcpy(buffer, key, (keylength_b+7)/8);
+	}
+	
+	for (i=0; i<HMAC_SHA256_BLOCK_BYTES; ++i){
+		buffer[i] ^= IPAD;
+	}
+	
+	sha256_init(&(s->a));
+	sha256_nextBlock(&(s->a), buffer);
+	
+	for (i=0; i<HMAC_SHA256_BLOCK_BYTES; ++i){
+		buffer[i] ^= IPAD^OPAD;
+	}
+	sha256_init(&(s->b));
+	sha256_nextBlock(&(s->b), buffer);
+	
+#if defined SECURE_WIPE_BUFFER
+	memset(buffer, 0, SHA256_BLOCK_BYTES);
+#endif
+}
+
+void hmac_sha256_nextBlock(hmac_sha256_ctx_t *s, const void* block){
+	sha256_nextBlock(&(s->a), block);
+}
+
+void hmac_sha256_lastBlock(hmac_sha256_ctx_t *s, const void* block, uint16_t length_b){
+/*	while(length_b>=SHA256_BLOCK_BITS){
+		sha256_nextBlock(&(s->a), block);
+		block = (uint8_t*)block + SHA256_BLOCK_BYTES;
+		length_b -= SHA256_BLOCK_BITS;
+	}
+*/	sha256_lastBlock(&(s->a), block, length_b);
+}
+
+void hmac_sha256_final(void* dest, hmac_sha256_ctx_t *s){
+	sha256_ctx2hash((sha256_hash_t*)dest, &(s->a));
+	sha256_lastBlock(&(s->b), dest, SHA256_HASH_BITS);
+	sha256_ctx2hash((sha256_hash_t*)dest, &(s->b));			
+}
+
+#endif
+
+/*
+ * keylength in bits!
+ * message length in bits!
+ */
+void hmac_sha256(void* dest, const void* key, uint16_t keylength_b, const void* msg, uint32_t msglength_b){ /* a one-shot*/
+	sha256_ctx_t s;
+	uint8_t i;
+	uint8_t buffer[HMAC_SHA256_BLOCK_BYTES];
+	
+	memset(buffer, 0, HMAC_SHA256_BLOCK_BYTES);
+	
+	/* if key is larger than a block we have to hash it*/
+	if (keylength_b > SHA256_BLOCK_BITS){
+		sha256((void*)buffer, key, keylength_b);
+	} else {
+		memcpy(buffer, key, (keylength_b+7)/8);
+	}
+	
+	for (i=0; i<SHA256_BLOCK_BYTES; ++i){
+		buffer[i] ^= IPAD;
+	}
+	sha256_init(&s);
+	sha256_nextBlock(&s, buffer);
+	while (msglength_b >= HMAC_SHA256_BLOCK_BITS){
+		sha256_nextBlock(&s, msg);
+		msg = (uint8_t*)msg + HMAC_SHA256_BLOCK_BYTES;
+		msglength_b -=  HMAC_SHA256_BLOCK_BITS;
+	}
+	sha256_lastBlock(&s, msg, msglength_b);
+	/* since buffer still contains key xor ipad we can do ... */
+	for (i=0; i<HMAC_SHA256_BLOCK_BYTES; ++i){
+		buffer[i] ^= IPAD ^ OPAD;
+	}
+	sha256_ctx2hash(dest, &s); /* save inner hash temporary to dest */
+	sha256_init(&s);
+	sha256_nextBlock(&s, buffer);
+	sha256_lastBlock(&s, dest, SHA256_HASH_BITS);
+	sha256_ctx2hash(dest, &s);
+}
diff --git a/hmac-sha256/hmac-sha256.h b/hmac-sha256/hmac-sha256.h
new file mode 100644
index 0000000..4df6af5
--- /dev/null
+++ b/hmac-sha256/hmac-sha256.h
@@ -0,0 +1,43 @@
+/* hmac-sha256.h */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+#ifndef HMACSHA256_H_
+#define HMACSHA256_H_
+
+#include "sha256.h"
+
+#define HMAC_SHA256_BITS        SHA256_HASH_BITS
+#define HMAC_SHA256_BYTES       SHA256_HASH_BYTES
+#define HMAC_SHA256_BLOCK_BITS  SHA256_BLOCK_BITS
+#define HMAC_SHA256_BLOCK_BYTES SHA256_BLOCK_BYTES
+
+
+typedef struct {
+	sha256_ctx_t a,b;
+} hmac_sha256_ctx_t;
+
+
+void hmac_sha256_init(hmac_sha256_ctx_t *s, const void* key, uint16_t keylength_b);
+void hmac_sha256_nextBlock(hmac_sha256_ctx_t *s, const void* block);
+void hmac_sha256_lastBlock(hmac_sha256_ctx_t *s, const void* block, uint16_t length_b);
+void hmac_sha256_final(void* dest, hmac_sha256_ctx_t *s);
+
+void hmac_sha256(void* dest, const void* key, uint16_t keylength_b, const void* msg, uint32_t msglength_b);
+
+
+#endif /*HMACSHA256_H_*/
diff --git a/hmac-sha256/sha256-asm.S b/hmac-sha256/sha256-asm.S
new file mode 100644
index 0000000..d9eb6b6
--- /dev/null
+++ b/hmac-sha256/sha256-asm.S
@@ -0,0 +1,1042 @@
+/* sha256-asm.S */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/*
+ * Author:	Daniel Otte
+ *
+ * License: GPLv3 or later
+*/
+; sha-256 implementation in assembler	
+SHA256_BLOCK_BITS = 512
+SHA256_HASH_BITS = 256
+
+.macro precall
+	/* push r18 - r27, r30 - r31*/
+	push r0
+	push r1
+	push r18
+	push r19
+	push r20
+	push r21
+	push r22
+	push r23
+	push r24
+	push r25
+	push r26
+	push r27
+	push r30
+	push r31
+	clr r1
+.endm
+
+.macro postcall
+	pop r31
+	pop r30
+	pop r27
+	pop r26
+	pop r25
+	pop r24
+	pop r23
+	pop r22
+	pop r21
+	pop r20
+	pop r19
+	pop r18
+	pop r1
+	pop r0
+.endm
+
+
+.macro hexdump length
+	push r27
+	push r26
+	ldi r25, '\r'
+	mov r24, r25
+	call uart_putc
+	ldi r25, '\n'
+	mov r24, r25
+	call uart_putc
+	pop r26
+	pop r27
+	movw r24, r26
+.if \length > 16
+	ldi r22, lo8(16)
+	ldi r23, hi8(16)
+	push r27
+	push r26
+	call uart_hexdump
+	pop r26
+	pop r27
+	adiw r26, 16
+	hexdump \length-16
+.else
+	ldi r22, lo8(\length)
+	ldi r23, hi8(\length)
+	call uart_hexdump
+.endif
+.endm
+
+/* X points to Block */
+.macro dbg_hexdump length
+	precall
+	hexdump \length
+	postcall
+.endm
+
+.section .text
+
+SPL = 0x3D
+SPH = 0x3E
+SREG = 0x3F
+
+
+;
+;sha256_ctx_t is:
+;
+; [h0][h1][h2][h3][h4][h5][h6][h7][length]
+; hn is 32 bit large, length is 64 bit large
+
+;###########################################################	
+
+.global sha256_ctx2hash
+; === sha256_ctx2hash ===
+; this function converts a state into a normal hash (bytestring)
+;  param1: the 16-bit destination pointer
+;	given in r25,r24 (r25 is most significant)
+;  param2: the 16-bit pointer to sha256_ctx structure
+;	given in r23,r22
+sha256_ctx2hash:
+	movw r26, r22
+	movw r30, r24
+	ldi r21, 8
+	sbiw r26, 4
+1:	
+	ldi r20, 4
+	adiw r26, 8
+2:	
+		ld r0, -X
+		st Z+, r0	
+	dec r20
+	brne 2b
+	
+	dec r21
+	brne 1b
+	
+	ret
+
+;###########################################################	
+
+.global sha256
+; === sha256 ===
+; this function calculates SHA-256 hashes from messages in RAM
+;  param1: the 16-bit hash destination pointer
+;	given in r25,r24 (r25 is most significant)
+;  param2: the 16-bit pointer to message
+;	given in r23,r22
+;  param3: 32-bit length value (length of message in bits)
+;   given in r21,r20,r19,r18
+sha256:
+sha256_prolog:
+	push r8
+	push r9
+	push r10
+	push r11
+	push r12
+	push r13
+	push r16
+	push r17
+	in r16, SPL
+	in r17, SPH
+	subi r16, 8*4+8 
+	sbci r17, 0	
+	in r0, SREG
+	cli
+	out SPL, r16
+	out SPH, r17
+	out SREG, r0
+	
+	push r25
+	push r24
+	inc r16
+	adc r17, r1
+	
+	movw r8, r18		/* backup of length*/
+	movw r10, r20
+	
+	movw r12, r22	/* backup pf msg-ptr */
+	
+	movw r24, r16
+	rcall sha256_init
+	/* if length >= 512 */
+1:
+	tst r11
+	brne 4f
+	tst r10
+	brne 4f
+	mov r19, r9
+	cpi r19, 0x02
+	brlo 4f
+	
+	movw r24, r16
+	movw r22, r12
+	rcall sha256_nextBlock
+	ldi r19, 0x64
+	add r22, r19
+	adc r23, r1
+	/* length -= 512 */
+	ldi r19, 0x02
+	sub r9, r19
+	sbc r10, r1
+	sbc r11, r1
+	rjmp 1b
+	
+4:
+	movw r24, r16
+	movw r22, r12
+	movw r20, r8
+	rcall sha256_lastBlock
+	
+	pop r24
+	pop r25
+	movw r22, r16
+	rcall sha256_ctx2hash	
+	
+sha256_epilog:
+	in r30, SPL
+	in r31, SPH
+	adiw r30, 8*4+8 	
+	in r0, SREG
+	cli
+	out SPL, r30
+	out SPH, r31
+	out SREG, r0
+	pop r17
+	pop r16
+	pop r13
+	pop r12
+	pop r11
+	pop r10
+	pop r9
+	pop r8
+	ret
+
+;###########################################################	
+
+
+; block MUST NOT be larger than 64 bytes
+
+.global sha256_lastBlock
+; === sha256_lastBlock ===
+; this function does padding & Co. for calculating SHA-256 hashes
+;  param1: the 16-bit pointer to sha256_ctx structure
+;	given in r25,r24 (r25 is most significant)
+;  param2: an 16-bit pointer to 64 byte block to hash
+;	given in r23,r22
+;  param3: an 16-bit integer specifing length of block in bits
+;	given in r21,r20
+sha256_lastBlock_localSpace = (SHA256_BLOCK_BITS/8+1)
+
+
+sha256_lastBlock:
+	cpi r21, 0x02
+	brlo sha256_lastBlock_prolog
+	push r25
+	push r24
+	push r23
+	push r22
+	push r21
+	push r20
+	rcall sha256_nextBlock
+	pop r20
+	pop r21
+	pop r22
+	pop r23
+	pop r24
+	pop r25
+	subi r21, 0x02
+	subi r23, -2
+	rjmp sha256_lastBlock	
+sha256_lastBlock_prolog:
+	/* allocate space on stack */
+	in r30, SPL
+	in r31, SPH
+	in r1, SREG
+	subi r30, lo8(64)
+	sbci r31, hi8(64)
+	cli
+	out SPL, r30
+	out SPH, r31
+	out SREG,r1
+
+	adiw r30, 1 /* SP points to next free byte on stack */
+	mov r18, r20 /* r20 = LSB(length) */
+	lsr r18
+	lsr r18
+	lsr r18
+	bst r21, 0	/* may be we should explain this ... */
+	bld r18, 5  /* now: r18 == length/8 (aka. length in bytes) */
+	
+	
+	movw r26, r22 /* X points to begin of msg */
+	tst r18
+	breq sha256_lastBlock_post_copy
+	mov r1, r18
+sha256_lastBlock_copy_loop:
+	ld r0, X+
+	st Z+, r0
+	dec r1
+	brne sha256_lastBlock_copy_loop
+sha256_lastBlock_post_copy:	
+sha256_lastBlock_insert_stuffing_bit:	
+	ldi r19, 0x80
+	mov r0,r19 	
+	ldi r19, 0x07
+	and r19, r20 /* if we are in bitmode */
+	breq 2f	/* no bitmode */
+1:	
+	lsr r0
+	dec r19
+	brne 1b
+	ld r19, X
+/* maybe we should do some ANDing here, just for safety */
+	or r0, r19
+2:	
+	st Z+, r0
+	inc r18
+
+/* checking stuff here */
+	cpi r18, 64-8+1
+	brsh 0f 
+	rjmp sha256_lastBlock_insert_zeros
+0:
+	/* oh shit, we landed here */
+	/* first we have to fill it up with zeros */
+	ldi r19, 64
+	sub r19, r18
+	breq 2f
+1:	
+	st Z+, r1
+	dec r19
+	brne 1b	
+2:	
+	sbiw r30, 63
+	sbiw r30,  1
+	movw r22, r30
+	
+	push r31
+	push r30
+	push r25
+	push r24
+	push r21
+	push r20
+	rcall sha256_nextBlock
+	pop r20
+	pop r21
+	pop r24
+	pop r25
+	pop r30
+	pop r31
+	
+	/* now we should subtract 512 from length */
+	movw r26, r24
+	adiw r26, 4*8+1 /* we can skip the lowest byte */
+	ld r19, X
+	subi r19, hi8(512)
+	st X+, r19
+	ldi r18, 6
+1:
+	ld r19, X
+	sbci r19, 0
+	st X+, r19
+	dec r18
+	brne 1b
+	
+;	clr r18 /* not neccessary ;-) */
+	/* reset Z pointer to begin of block */
+
+sha256_lastBlock_insert_zeros:	
+	ldi r19, 64-8
+	sub r19, r18
+	breq sha256_lastBlock_insert_length
+	clr r1
+1:
+	st Z+, r1	/* r1 is still zero */
+	dec r19
+	brne 1b
+
+;	rjmp sha256_lastBlock_epilog
+sha256_lastBlock_insert_length:
+	movw r26, r24	/* X points to state */
+	adiw r26, 8*4	/* X points to (state.length) */
+	adiw r30, 8		/* Z points one after the last byte of block */
+	ld r0, X+
+	add r0, r20
+	st -Z, r0
+	ld r0, X+
+	adc r0, r21
+	st -Z, r0
+	ldi r19, 6
+1:
+	ld r0, X+
+	adc r0, r1
+	st -Z, r0
+	dec r19
+	brne 1b
+
+	sbiw r30, 64-8
+	movw r22, r30
+	rcall sha256_nextBlock
+
+sha256_lastBlock_epilog:
+	in r30, SPL
+	in r31, SPH
+	in r1, SREG
+	adiw r30, 63 ; lo8(64)
+	adiw r30,  1  ; hi8(64)
+	cli
+	out SPL, r30
+	out SPH, r31
+	out SREG,r1
+	clr r1
+	clr r0
+	ret
+
+/**/
+;###########################################################	
+
+.global sha256_nextBlock
+; === sha256_nextBlock ===
+; this is the core function for calculating SHA-256 hashes
+;  param1: the 16-bit pointer to sha256_ctx structure
+;	given in r25,r24 (r25 is most significant)
+;  param2: an 16-bit pointer to 64 byte block to hash
+;	given in r23,r22
+sha256_nextBlock_localSpace = (64+8)*4 ; 64 32-bit values for w array and 8 32-bit values for a array (total 288 byte)
+
+Bck1 = 12
+Bck2 = 13
+Bck3 = 14
+Bck4 = 15
+Func1 = 22
+Func2 = 23
+Func3 = 24
+Func4 = 25
+Accu1 = 16
+Accu2 = 17
+Accu3 = 18
+Accu4 = 19
+XAccu1 = 8
+XAccu2 = 9
+XAccu3 = 10
+XAccu4 = 11
+T1	= 4
+T2	= 5
+T3	= 6
+T4	= 7
+LoopC = 1
+/* byteorder: high number <--> high significance */
+sha256_nextBlock:
+ ; initial, let's make some space ready for local vars
+	push r4 /* replace push & pop by mem ops? */
+	push r5
+	push r6
+	push r7
+	push r8
+	push r9
+	push r10
+	push r11
+	push r12
+	push r13
+	push r14
+	push r15
+	push r16
+	push r17
+	push r28
+	push r29
+	in r20, SPL
+	in r21, SPH
+	movw r18, r20			;backup SP
+;	movw r26, r20			; X points to free space on stack 
+	movw r30, r22			; Z points to message
+	subi r20, lo8(sha256_nextBlock_localSpace) ;sbiw can do only up to 63
+	sbci r21, hi8(sha256_nextBlock_localSpace)
+	movw r26, r20			; X points to free space on stack 
+	in r0, SREG
+	cli ; we want to be uninterrupted while updating SP
+	out SPL, r20
+	out SPH, r21
+	out SREG, r0
+	push r18
+	push r19
+	push r24
+	push r25 /* param1 will be needed later */
+ ; now we fill the w array with message (think about endianess)
+ 	adiw r26, 1 ; X++
+ 	ldi r20, 16
+sha256_nextBlock_wcpyloop: 	
+ 	ld r23, Z+
+ 	ld r22, Z+
+ 	ld r19, Z+
+ 	ld r18, Z+
+ 	st X+, r18
+ 	st X+, r19
+ 	st X+, r22	
+	st X+, r23
+	dec r20
+	brne sha256_nextBlock_wcpyloop
+/*	for (i=16; i<64; ++i){
+		w[i] = SIGMA_b(w[i-2]) + w[i-7] + SIGMA_a(w[i-15]) + w[i-16];	
+	} */
+	/* r25,r24,r23,r24 (r21,r20) are function values
+	   r19,r18,r17,r16 are the accumulator
+	   r15,r14,r13,rBck1 are backup1
+	   r11,r10,r9 ,r8  are xor accu   
+	   r1 is round counter 								*/
+
+	ldi r20, 64-16
+	mov LoopC, r20
+sha256_nextBlock_wcalcloop:		 
+	movw r30, r26 ; cp X to Z
+	sbiw r30, 63
+	sbiw r30, 1 		; substract 64 = 16*4
+	ld Accu1, Z+
+	ld Accu2, Z+
+	ld Accu3, Z+
+	ld Accu4, Z+ /* w[i] = w[i-16] */
+	ld Bck1, Z+
+	ld Bck2, Z+
+	ld Bck3, Z+
+	ld Bck4, Z+ /* backup = w[i-15] */
+	/* now sigma 0 */
+	mov Func1, Bck2
+	mov Func2, Bck3
+	mov Func3, Bck4
+	mov Func4, Bck1  /* prerotated by 8 */
+	ldi r20, 1
+	rcall bitrotl
+	movw XAccu1, Func1
+	movw XAccu3, Func3	 /* store ROTR(w[i-15],7) in xor accu */
+	movw Func1, Bck3
+	movw Func3, Bck1 /* prerotated by 16 */
+	ldi r20, 2
+	rcall bitrotr
+	eor XAccu1, Func1  /* xor ROTR(w[i-15], 18)*/
+	eor XAccu2, Func2
+	eor XAccu3, Func3
+	eor XAccu4, Func4
+	ldi Func2, 3		 /* now shr3 */ /*we can destroy backup now*/
+sigma0_shr:
+	lsr Bck4
+	ror Bck3
+	ror Bck2
+	ror Bck1	
+	dec Func2
+	brne sigma0_shr
+	eor XAccu1, Bck1
+	eor XAccu2, Bck2
+	eor XAccu3, Bck3
+	eor XAccu4, Bck4	/* xor SHR(w[i-15], 3)*/ /* xor accu == sigma1(w[i-15]) */
+	add Accu1, XAccu1
+	adc Accu2, XAccu2
+	adc Accu3, XAccu3
+	adc Accu4, XAccu4 /* finished with sigma0 */
+	ldd Func1, Z+7*4  /* now accu += w[i-7] */
+	ldd Func2, Z+7*4+1
+	ldd Func3, Z+7*4+2
+	ldd Func4, Z+7*4+3
+	add Accu1, Func1
+	adc Accu2, Func2
+	adc Accu3, Func3
+	adc Accu4, Func4
+	ldd Bck1, Z+12*4 /* now backup = w[i-2]*/
+	ldd Bck2, Z+12*4+1
+	ldd Bck3, Z+12*4+2
+	ldd Bck4, Z+12*4+3
+	/* now sigma 1 */
+	movw Func1, Bck3
+	movw Func3, Bck1 /* prerotated by 16 */
+	ldi r20, 1
+	rcall bitrotr
+	movw XAccu3, Func3
+	movw XAccu1, Func1	 /* store in ROTR(w[i-2], 17) xor accu */
+;	movw Func1, Bck3
+;	movw Func3, Bck1 /* prerotated by 16 */
+	ldi r20, 2
+	rcall bitrotr
+	eor XAccu1, Func1  /* xor ROTR(w[i-2], 19)*/
+	eor XAccu2, Func2
+	eor XAccu3, Func3
+	eor XAccu4, Func4
+	ldi Func2, 2	 /* now shr10 (dirty trick, skipping a byte) */ /*we can destroy backup now*/
+sigma1_shr:
+	lsr Bck4
+	ror Bck3
+	ror Bck2	
+	dec Func2
+	brne sigma1_shr
+	eor XAccu1, Bck2
+	eor XAccu2, Bck3
+	eor XAccu3, Bck4  /* xor SHR(w[i-2], 10)*/ /* xor accu == sigma1(w[i-15]) */
+	add Accu1, XAccu1
+	adc Accu2, XAccu2
+	adc Accu3, XAccu3
+	adc Accu4, XAccu4 /* finished with sigma0 */
+	/* now let's store the shit */
+	st X+, Accu1
+	st X+, Accu2
+	st X+, Accu3
+	st X+, Accu4
+	dec LoopC
+	breq 3f  ; skip if zero
+	rjmp sha256_nextBlock_wcalcloop
+3:
+	/* we are finished with w array X points one byte post w */
+/* init a array */
+	pop r31
+	pop r30
+	push r30
+	push r31
+	ldi r25, 8*4 /* 8 32-bit values to copy from ctx to a array */
+init_a_array:	
+	ld r1, Z+
+	st X+, r1
+	dec r25
+	brne init_a_array
+	
+/* now the real fun begins */
+/* for (i=0; i<64; ++i){
+			t1 = a[7] + SIGMA1(a[4]) + CH(a[4],a[5],a[6]) + k[i] + w[i];
+			t2 = SIGMA0(a[0]) + MAJ(a[0],a[1],a[2]);
+			memmove(&(a[1]), &(a[0]), 7*4); 	// a[7]=a[6]; a[6]=a[5]; a[5]=a[4]; a[4]=a[3]; a[3]=a[2]; a[2]=a[1]; a[1]=a[0]; 
+			a[4] += t1;
+			a[0] = t1 + t2;
+		} */
+	/* Y points to a[0], Z ('cause lpm wants it) points to k[i], X points to w[i] */
+	sbiw r26, 8*4  /* X still points at a[7]+1*/
+	movw r28, r26
+	ldi r30, lo8(sha256_kv)
+	ldi r31, hi8(sha256_kv)		
+	dec r27  /* X - (64*4 == 256) */
+	ldi r25, 64
+	mov LoopC, r25
+sha256_main_loop:
+	/* now calculate t1 */
+	 /*CH(x,y,z) = (x&y)^((~x)&z)*/
+	ldd T1, Y+5*4
+	ldd T2, Y+5*4+1
+	ldd T3, Y+5*4+2
+	ldd T4, Y+5*4+3 /* y in T */
+	ldd Func1, Y+4*4
+	ldd Func2, Y+4*4+1
+	ldd Func3, Y+4*4+2
+	ldd Func4, Y+4*4+3  /* x in Func */
+	ldd Bck1, Y+6*4
+	ldd Bck2, Y+6*4+1
+	ldd Bck3, Y+6*4+2
+	ldd Bck4, Y+6*4+3 /* z in Bck */
+	and T1, Func1
+	and T2, Func2
+	and T3, Func3
+	and T4, Func4
+	com Func1
+	com Func2
+	com Func3
+	com Func4
+	and Bck1, Func1
+	and Bck2, Func2
+	and Bck3, Func3
+	and Bck4, Func4
+	eor T1, Bck1
+	eor T2, Bck2
+	eor T3, Bck3
+	eor T4, Bck4 /* done, CH(x,y,z) is in T */
+	/* now SIGMA1(a[4]) */
+	ldd Bck4, Y+4*4		/* think about using it from Func reg above*/
+	ldd Bck1, Y+4*4+1	
+	ldd Bck2, Y+4*4+2
+	ldd Bck3, Y+4*4+3 /* load prerotate by 8-bit */	
+	movw Func1, Bck1
+	movw Func3, Bck3
+	ldi r20, 2 
+	rcall bitrotl		/* rotr(x,6) */ 
+	movw XAccu1, Func1
+	movw XAccu3, Func3
+	movw Func1, Bck1
+	movw Func3, Bck3
+	ldi r20, 3 
+	rcall bitrotr 	/* rotr(x,11) */
+	eor XAccu1, Func1
+	eor XAccu2, Func2
+	eor XAccu3, Func3
+	eor XAccu4, Func4
+	movw Func1, Bck3 /* this prerotates furteh 16 bits*/
+	movw Func3, Bck1 /* so we have now prerotated by 24 bits*/
+	ldi r20, 1 
+	rcall bitrotr 	/* rotr(x,11) */
+	eor XAccu1, Func1
+	eor XAccu2, Func2
+	eor XAccu3, Func3
+	eor XAccu4, Func4 /* finished with SIGMA1, add it to T */
+	add T1, XAccu1
+	adc T2, XAccu2
+	adc T3, XAccu3
+	adc T4, XAccu4
+	/* now we've to add a[7], w[i] and k[i] */
+	ldd XAccu1, Y+4*7
+	ldd XAccu2, Y+4*7+1
+	ldd XAccu3, Y+4*7+2
+	ldd XAccu4, Y+4*7+3
+	add T1, XAccu1
+	adc T2, XAccu2
+	adc T3, XAccu3
+	adc T4, XAccu4 /* add a[7] */
+	ld XAccu1, X+
+	ld XAccu2, X+
+	ld XAccu3, X+
+	ld XAccu4, X+
+	add T1, XAccu1
+	adc T2, XAccu2
+	adc T3, XAccu3
+	adc T4, XAccu4 /* add w[i] */
+	lpm XAccu1, Z+
+	lpm XAccu2, Z+
+	lpm XAccu3, Z+
+	lpm XAccu4, Z+
+	add T1, XAccu1
+	adc T2, XAccu2
+	adc T3, XAccu3
+	adc T4, XAccu4 /* add k[i] */ /* finished with t1 */
+	/*now t2 = SIGMA0(a[0]) + MAJ(a[0],a[1],a[2]) */ /*i did to much x86 asm, i always see 4 32bit regs*/
+		/* starting with MAJ(x,y,z) */
+	ldd Func1, Y+4*0+0
+	ldd Func2, Y+4*0+1
+	ldd Func3, Y+4*0+2
+	ldd Func4, Y+4*0+3 /* load x=a[0] */
+	ldd XAccu1, Y+4*1+0
+	ldd XAccu2, Y+4*1+1
+	ldd XAccu3, Y+4*1+2
+	ldd XAccu4, Y+4*1+3 /* load y=a[1] */
+	and XAccu1, Func1
+	and XAccu2, Func2
+	and XAccu3, Func3
+	and XAccu4, Func4	/* XAccu == (x & y) */
+	ldd Bck1, Y+4*2+0
+	ldd Bck2, Y+4*2+1
+	ldd Bck3, Y+4*2+2
+	ldd Bck4, Y+4*2+3 /* load z=a[2] */
+	and Func1, Bck1
+	and Func2, Bck2
+	and Func3, Bck3
+	and Func4, Bck4
+	eor XAccu1, Func1
+	eor XAccu2, Func2
+	eor XAccu3, Func3
+	eor XAccu4, Func4	/* XAccu == (x & y) ^ (x & z) */
+	ldd Func1, Y+4*1+0
+	ldd Func2, Y+4*1+1
+	ldd Func3, Y+4*1+2
+	ldd Func4, Y+4*1+3 /* load y=a[1] */
+	and Func1, Bck1
+	and Func2, Bck2
+	and Func3, Bck3
+	and Func4, Bck4
+	eor XAccu1, Func1
+	eor XAccu2, Func2
+	eor XAccu3, Func3
+	eor XAccu4, Func4	/* XAccu == Maj(x,y,z) == (x & y) ^ (x & z) ^ (y & z) */
+   	/* SIGMA0(a[0]) */
+	ldd Bck1, Y+4*0+0 /* we should combine this with above */
+	ldd Bck2, Y+4*0+1
+	ldd Bck3, Y+4*0+2
+	ldd Bck4, Y+4*0+3
+	movw Func1, Bck1
+	movw Func3, Bck3
+	ldi r20, 2
+	rcall bitrotr
+	movw Accu1, Func1
+	movw Accu3, Func3 /* Accu = shr(a[0], 2) */
+	movw Func1, Bck3 
+	movw Func3, Bck1 /* prerotate by 16 bits */
+	ldi r20, 3
+	rcall bitrotl
+	eor Accu1, Func1
+	eor Accu2, Func2
+	eor Accu3, Func3
+	eor Accu4, Func4 /* Accu ^= shr(a[0], 13) */
+	mov Func1, Bck4
+	mov Func2, Bck1
+	mov Func3, Bck2
+	mov Func4, Bck3  /* prerotate by 24 bits */
+	ldi r20, 2
+	rcall bitrotl
+	eor Accu1, Func1
+	eor Accu2, Func2
+	eor Accu3, Func3
+	eor Accu4, Func4 /* Accu ^= shr(a[0], 22) */
+	add Accu1, XAccu1 /* add previous result (MAJ)*/
+	adc Accu2, XAccu2
+	adc Accu3, XAccu3
+	adc Accu4, XAccu4
+	/* now we are finished with the computing stuff (t1 in T, t2 in Accu)*/
+	/* a[7]=a[6]; a[6]=a[5]; a[5]=a[4]; a[4]=a[3]; a[3]=a[2]; a[2]=a[1]; a[1]=a[0]; */
+
+	ldi r21, 7*4
+	adiw r28, 7*4
+a_shift_loop:
+	ld  r25, -Y /* warning: this is PREdecrement */
+	std Y+4, r25
+	dec r21
+	brne a_shift_loop
+
+	ldd Bck1, Y+4*4+0
+	ldd Bck2, Y+4*4+1
+	ldd Bck3, Y+4*4+2
+	ldd Bck4, Y+4*4+3
+	add Bck1, T1
+	adc Bck2, T2
+	adc Bck3, T3
+	adc Bck4, T4
+	std Y+4*4+0, Bck1
+	std Y+4*4+1, Bck2
+	std Y+4*4+2, Bck3
+	std Y+4*4+3, Bck4
+	add Accu1, T1
+	adc Accu2, T2
+	adc Accu3, T3
+	adc Accu4, T4
+	std Y+4*0+0, Accu1
+	std Y+4*0+1, Accu2
+	std Y+4*0+2, Accu3
+	std Y+4*0+3, Accu4 /* a array updated */
+	
+	
+	dec LoopC
+	breq update_state
+	rjmp sha256_main_loop ;brne sha256_main_loop
+update_state:	
+	/* update state */
+	/* pointers to state should still exist on the stack ;-) */
+	pop r31
+	pop r30
+	ldi r21, 8
+update_state_loop:
+	ldd Accu1, Z+0
+	ldd Accu2, Z+1
+	ldd Accu3, Z+2
+	ldd Accu4, Z+3 
+	ld Func1, Y+
+	ld Func2, Y+
+	ld Func3, Y+
+	ld Func4, Y+
+	add Accu1, Func1
+	adc Accu2, Func2
+	adc Accu3, Func3
+	adc Accu4, Func4
+	st Z+, Accu1
+	st Z+, Accu2
+	st Z+, Accu3
+	st Z+, Accu4
+	dec r21
+	brne update_state_loop
+	/* now we just have to update the length */
+	adiw r30, 1 /* since we add 512, we can simply skip the LSB */ 
+	ldi r21, 2
+	ldi r22, 6
+	ld r20, Z
+	add r20, r21
+	st Z+, r20	
+	clr r21
+sha256_nextBlock_fix_length:	
+	brcc sha256_nextBlock_epilog
+	ld r20, Z
+	adc r20, r21
+	st Z+, r20
+	dec r22
+	brne sha256_nextBlock_fix_length
+	
+; EPILOG
+sha256_nextBlock_epilog:
+/* now we should clean up the stack */
+	
+	pop r21
+	pop r20
+	in r0, SREG
+	cli ; we want to be uninterrupted while updating SP
+	out SPL, r20
+	out SPH, r21
+	out SREG, r0
+	
+	clr r1
+	pop r29
+	pop r28
+	pop r17
+	pop r16
+	pop r15
+	pop r14
+	pop r13
+	pop r12
+	pop r11
+	pop r10
+	pop r9
+	pop r8
+	pop r7
+	pop r6
+	pop r5
+	pop r4 
+	ret
+
+sha256_kv: ; round-key-vector stored in ProgMem 
+.word	0x2f98, 0x428a, 0x4491, 0x7137, 0xfbcf, 0xb5c0, 0xdba5, 0xe9b5, 0xc25b, 0x3956, 0x11f1, 0x59f1, 0x82a4, 0x923f, 0x5ed5, 0xab1c
+.word	0xaa98, 0xd807, 0x5b01, 0x1283, 0x85be, 0x2431, 0x7dc3, 0x550c, 0x5d74, 0x72be, 0xb1fe, 0x80de, 0x06a7, 0x9bdc, 0xf174, 0xc19b
+.word	0x69c1, 0xe49b, 0x4786, 0xefbe, 0x9dc6, 0x0fc1, 0xa1cc, 0x240c, 0x2c6f, 0x2de9, 0x84aa, 0x4a74, 0xa9dc, 0x5cb0, 0x88da, 0x76f9
+.word	0x5152, 0x983e, 0xc66d, 0xa831, 0x27c8, 0xb003, 0x7fc7, 0xbf59, 0x0bf3, 0xc6e0, 0x9147, 0xd5a7, 0x6351, 0x06ca, 0x2967, 0x1429
+.word	0x0a85, 0x27b7, 0x2138, 0x2e1b, 0x6dfc, 0x4d2c, 0x0d13, 0x5338, 0x7354, 0x650a, 0x0abb, 0x766a, 0xc92e, 0x81c2, 0x2c85, 0x9272
+.word	0xe8a1, 0xa2bf, 0x664b, 0xa81a, 0x8b70, 0xc24b, 0x51a3, 0xc76c, 0xe819, 0xd192, 0x0624, 0xd699, 0x3585, 0xf40e, 0xa070, 0x106a
+.word	0xc116, 0x19a4, 0x6c08, 0x1e37, 0x774c, 0x2748, 0xbcb5, 0x34b0, 0x0cb3, 0x391c, 0xaa4a, 0x4ed8, 0xca4f, 0x5b9c, 0x6ff3, 0x682e
+.word	0x82ee, 0x748f, 0x636f, 0x78a5, 0x7814, 0x84c8, 0x0208, 0x8cc7, 0xfffa, 0x90be, 0x6ceb, 0xa450, 0xa3f7, 0xbef9, 0x78f2, 0xc671
+
+	
+;###########################################################	
+
+.global sha256_init 
+;uint32_t sha256_init_vector[]={
+;  	0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A,
+;	0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19 };
+;
+;void sha256_init(sha256_ctx_t *state){
+;	state->length=0;
+;	memcpy(state->h, sha256_init_vector, 8*4);
+;}
+; param1: (r23,r24) 16-bit pointer to sha256_ctx_t struct in ram
+; modifys: Z(r30,r31), Func1, r22
+sha256_init:
+	movw r26, r24 ; (24,25) --> (26,27) load X with param1
+	ldi r30, lo8((sha256_init_vector))
+	ldi r31, hi8((sha256_init_vector))
+	ldi r22, 32+8
+sha256_init_vloop:	
+	lpm r23, Z+ 
+	st X+, r23
+	dec r22
+	brne sha256_init_vloop
+	ret
+	
+sha256_init_vector:
+.word 0xE667, 0x6A09
+.word 0xAE85, 0xBB67 
+.word 0xF372, 0x3C6E 
+.word 0xF53A, 0xA54F 
+.word 0x527F, 0x510E 
+.word 0x688C, 0x9B05 
+.word 0xD9AB, 0x1F83 
+.word 0xCD19, 0x5BE0
+.word 0x0000, 0x0000
+.word 0x0000, 0x0000
+
+;###########################################################	
+
+.global rotl32
+; === ROTL32 ===
+; function that rotates a 32 bit word to the left
+;  param1: the 32-bit word to rotate
+;	given in r25,r24,r23,r22 (r25 is most significant)
+;  param2: an 8-bit value telling how often to rotate
+;	given in r20
+; modifys: r21, r22
+rotl32:
+	cpi r20, 8
+	brlo bitrotl
+	mov r21, r25
+	mov r25, r24
+	mov r24, r23
+	mov r23, r22
+	mov r22, r21
+	subi r20, 8
+	rjmp rotl32
+bitrotl:
+	clr r21
+	clc
+bitrotl_loop:	
+	tst r20
+	breq fixrotl
+	rol r22
+	rol r23
+	rol r24
+	rol r25
+	rol r21
+	dec r20
+	rjmp bitrotl_loop
+fixrotl:
+	or r22, r21
+	ret
+	
+
+;###########################################################	
+
+.global rotr32
+; === ROTR32 ===
+; function that rotates a 32 bit word to the right
+;  param1: the 32-bit word to rotate
+;	given in r25,r24,r23,22 (r25 is most significant)
+;  param2: an 8-bit value telling how often to rotate
+;	given in r20
+; modifys: r21, r22
+rotr32:
+	cpi r20, 8
+	brlo bitrotr
+	mov r21, r22
+	mov r22, r23
+	mov r23, r24
+	mov r24, r25
+	mov r25, r21
+	subi r20, 8
+	rjmp rotr32
+bitrotr:
+	clr r21
+	clc
+bitrotr_loop:	
+	tst r20
+	breq fixrotr
+	ror r25
+	ror r24
+	ror r23
+	ror r22
+	ror r21
+	dec r20
+	rjmp bitrotr_loop
+fixrotr:
+	or r25, r21
+	ret
+	
+	
+;###########################################################	
+	
+.global change_endian32
+; === change_endian32 ===
+; function that changes the endianess of a 32-bit word
+;  param1: the 32-bit word
+;	given in r25,r24,r23,22 (r25 is most significant)
+;  modifys: r21, r22
+change_endian32:
+	movw r20,  r22 ; (r22,r23) --> (r20,r21)
+	mov r22, r25
+	mov r23, r24
+	mov r24, r21
+	mov r25, r20 
+	ret
+
diff --git a/hmac-sha256/sha256.h b/hmac-sha256/sha256.h
new file mode 100644
index 0000000..24960a3
--- /dev/null
+++ b/hmac-sha256/sha256.h
@@ -0,0 +1,122 @@
+/* sha256.h */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/**
+ * \file	sha256.h
+ * \author  Daniel Otte 
+ * \date    2006-05-16
+ * \license	GPLv3 or later
+ * 
+ */
+
+#ifndef SHA256_H_
+#define SHA256_H_
+
+#define __LITTLE_ENDIAN__
+
+
+#include <stdint.h>
+
+/** \def SHA256_HASH_BITS
+ * defines the size of a SHA-256 hash value in bits
+ */
+
+/** \def SHA256_HASH_BYTES
+ * defines the size of a SHA-256 hash value in bytes
+ */
+
+/** \def SHA256_BLOCK_BITS
+ * defines the size of a SHA-256 input block in bits
+ */
+
+/** \def SHA256_BLOCK_BYTES
+ * defines the size of a SHA-256 input block in bytes
+ */
+
+#define SHA256_HASH_BITS  256
+#define SHA256_HASH_BYTES (SHA256_HASH_BITS/8)
+#define SHA256_BLOCK_BITS 512
+#define SHA256_BLOCK_BYTES (SHA256_BLOCK_BITS/8)
+
+/** \typedef sha256_ctx_t
+ * \brief SHA-256 context type
+ * 
+ * A variable of this type may hold the state of a SHA-256 hashing process
+ */
+typedef struct {
+	uint32_t h[8];
+	uint64_t length;
+} sha256_ctx_t;
+
+/** \typedef sha256_hash_t
+ * \brief SHA-256 hash value type
+ * 
+ * A variable of this type may hold the hash value produced by the
+ * sha256_ctx2hash(sha256_hash_t* dest, const sha256_ctx_t* state) function.
+ */
+typedef uint8_t sha256_hash_t[SHA256_HASH_BYTES];
+
+/** \fn void sha256_init(sha256_ctx_t *state)
+ * \brief initialise a SHA-256 context
+ * 
+ * This function sets a ::sha256_ctx_t to the initial values for hashing.
+ * \param state pointer to the SHA-256 hashing context
+ */
+void sha256_init(sha256_ctx_t *state);
+
+/** \fn void sha256_nextBlock (sha256_ctx_t* state, const void* block)
+ * \brief update the context with a given block
+ * 
+ * This function updates the SHA-256 hash context by processing the given block
+ * of fixed length.
+ * \param state pointer to the SHA-256 hash context
+ * \param block pointer to the block of fixed length (512 bit = 64 byte)
+ */
+void sha256_nextBlock (sha256_ctx_t* state, const void* block);
+
+/** \fn void sha256_lastBlock(sha256_ctx_t* state, const void* block, uint16_t length_b)
+ * \brief finalize the context with the given block 
+ * 
+ * This function finalizes the SHA-256 hash context by processing the given block
+ * of variable length.
+ * \param state pointer to the SHA-256 hash context
+ * \param block pointer to the block of fixed length (512 bit = 64 byte)
+ * \param length_b the length of the block in bits
+ */
+void sha256_lastBlock(sha256_ctx_t* state, const void* block, uint16_t length_b);
+
+/** \fn void sha256_ctx2hash(sha256_hash_t* dest, const sha256_ctx_t* state)
+ * \brief convert the hash state into the hash value
+ * This function reads the context and writes the hash value to the destination
+ * \param dest pointer to the location where the hash value should be written
+ * \param state pointer to the SHA-256 hash context
+ */
+void sha256_ctx2hash(sha256_hash_t* dest, const sha256_ctx_t* state);
+
+/** \fn void sha256(sha256_hash_t* dest, const void* msg, uint32_t length_b)
+ * \brief simple SHA-256 hashing function for direct hashing
+ * 
+ * This function automaticaly hashes a given message of arbitary length with
+ * the SHA-256 hashing algorithm.
+ * \param dest pointer to the location where the hash value is going to be written to
+ * \param msg pointer to the message thats going to be hashed
+ * \param length_b length of the message in bits
+ */
+void sha256(sha256_hash_t* dest, const void* msg, uint32_t length_b);
+
+#endif /*SHA256_H_*/
diff --git a/md5-asm.S b/md5-asm.S
deleted file mode 100644
index de3b170..0000000
--- a/md5-asm.S
+++ /dev/null
@@ -1,977 +0,0 @@
-/* md5-asm.S */
-/*
-    This file is part of the AVR-Crypto-Lib.
-    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-/*
- * Author:  Daniel Otte
- * License: GPLv3 or later
- * Date:    2008-11-15
-*/
-
-
-#include "avr-asm-macros.S"
-
-;###########################################################	
-; S-BOX
-
-T_table:
-.hword	0xa478, 0xd76a, 0xb756, 0xe8c7, 0x70db, 0x2420, 0xceee, 0xc1bd, 0x0faf, 0xf57c 
-.hword	0xc62a, 0x4787, 0x4613, 0xa830, 0x9501, 0xfd46, 0x98d8, 0x6980, 0xf7af, 0x8b44 
-.hword	0x5bb1, 0xffff, 0xd7be, 0x895c, 0x1122, 0x6b90, 0x7193, 0xfd98, 0x438e, 0xa679 
-.hword	0x0821, 0x49b4, 0x2562, 0xf61e, 0xb340, 0xc040, 0x5a51, 0x265e, 0xc7aa, 0xe9b6 
-.hword	0x105d, 0xd62f, 0x1453, 0x0244, 0xe681, 0xd8a1, 0xfbc8, 0xe7d3, 0xcde6, 0x21e1 
-.hword	0x07d6, 0xc337, 0x0d87, 0xf4d5, 0x14ed, 0x455a, 0xe905, 0xa9e3, 0xa3f8, 0xfcef 
-.hword	0x02d9, 0x676f, 0x4c8a, 0x8d2a, 0x3942, 0xfffa, 0xf681, 0x8771, 0x6122, 0x6d9d 
-.hword	0x380c, 0xfde5, 0xea44, 0xa4be, 0xcfa9, 0x4bde, 0x4b60, 0xf6bb, 0xbc70, 0xbebf 
-.hword	0x7ec6, 0x289b, 0x27fa, 0xeaa1, 0x3085, 0xd4ef, 0x1d05, 0x0488, 0xd039, 0xd9d4 
-.hword	0x99e5, 0xe6db, 0x7cf8, 0x1fa2, 0x5665, 0xc4ac, 0x2244, 0xf429, 0xff97, 0x432a 
-.hword	0x23a7, 0xab94, 0xa039, 0xfc93, 0x59c3, 0x655b, 0xcc92, 0x8f0c, 0xf47d, 0xffef 
-.hword	0x5dd1, 0x8584, 0x7e4f, 0x6fa8, 0xe6e0, 0xfe2c, 0x4314, 0xa301, 0x11a1, 0x4e08 
-.hword	0x7e82, 0xf753, 0xf235, 0xbd3a, 0xd2bb, 0x2ad7, 0xd391, 0xeb86
-
-
-#define MD5_init_fast
-
-.global md5_init 
-#ifndef MD5_init_fast
-;###########################################################	
-;void md5_init(md5_ctx_t *state)
-; param1: (r24,r25) 16-bit pointer to sha256_ctx_t struct in ram
-; modifys: Z(r30,r31), X(r25,r26)
-; size = 9+5*4 WORDS = 29 WORDS = 58 Bytes
-md5_init:
-	movw r26, r24 ; (24,25) --> (26,27) load X with param1
-	ldi r30, lo8(md5_init_vector)
-	ldi r31, hi8(md5_init_vector)
-	ldi r24, 16+4
-md5_init_vloop:	
-	lpm r0, Z+ 
-	st X+, r0
-	dec r24
-	brne md5_init_vloop
-	ret
-	
-md5_init_vector:
-.hword 0x2301, 0x6745
-.hword 0xAB89, 0xEFCD 
-.hword 0xDCFE, 0x98BA 
-.hword 0x5476, 0x1032 
-.hword 0x0000, 0x0000
-
-#else
-;###########################################################	
-.global md5_init_fast 
-;void md5_init(md5_ctx_t *state)
-; param1: (r24,r25) 16-bit pointer to sha256_ctx_t struct in ram
-; modifys: r23, r22
-; cycles = 1+16*3+4*2+4 = 1+48+12 = 61
-; size = 1+16*2+4+1 WORDS = 38 WORDS = 76 Bytes
-md5_init:
-md5_init_fast:
-	movw r26, r24
-	ldi r24, 0x01
-	st X+, r24
-	ldi r24, 0x23
-	st X+, r24
-	ldi r24, 0x45
-	st X+, r24
-	ldi r24, 0x67
-	st X+, r24
-	ldi r24, 0x89
-	st X+, r24
-	ldi r24, 0xAB
-	st X+, r24
-	ldi r24, 0xCD
-	st X+, r24
-	ldi r24, 0xEF
-	st X+, r24
-	ldi r24, 0xFE
-	st X+, r24
-	ldi r24, 0xDC
-	st X+, r24
-	ldi r24, 0xBA
-	st X+, r24
-	ldi r24, 0x98
-	st X+, r24
-	ldi r24, 0x76
-	st X+, r24
-	ldi r24, 0x54
-	st X+, r24
-	ldi r24, 0x32
-	st X+, r24
-	ldi r24, 0x10
-	st X+, r24
-	st X+, r1
-	st X+, r1
-	st X+, r1
-	st X+, r1
-	ret
-#endif
-;###########################################################	
-
-/*
-static 
-uint32_t md5_F(uint32_t x, uint32_t y, uint32_t z){
-	return ((x&y)|((~x)&z));
-}
-*/
-; x: r22-r25
-; y: r18-r21
-; z: r14-r17
-md5_F:
-	and r18, r22
-	and r19, r23
-	and r20, r24
-	and r21, r25
-	com r22
-	com r23
-	com r24
-	com r25
-	and r22, r14
-	and r23, r15
-	and r24, r16
-	and r25, r17
-	or  r22, r18
-	or  r23, r19
-	or  r24, r20
-	or  r25, r21
-	rjmp md5_core_F_exit
-	
-/*
-static
-uint32_t md5_G(uint32_t x, uint32_t y, uint32_t z){
-	return ((x&z)|((~z)&y));
-}
-*/
-
-; x: r22-r25
-; y: r18-r21
-; z: r14-r17
-md5_G:
-	and r22, r14
-	and r23, r15
-	and r24, r16
-	and r25, r17
-	com r14
-	com r15
-	com r16
-	com r17
-	and r18, r14
-	and r19, r15
-	and r20, r16
-	and r21, r17
-	or  r22, r18
-	or  r23, r19
-	or  r24, r20
-	or  r25, r21
-	rjmp md5_core_F_exit
-/*
-static
-uint32_t md5_H(uint32_t x, uint32_t y, uint32_t z){
-	return (x^y^z);
-}
-*/
-; x: r22-r25
-; y: r18-r21
-; z: r14-r17
-md5_H:
-	eor r22, r18
-	eor r22, r14
-	eor r23, r19
-	eor r23, r15
-	eor r24, r20
-	eor r24, r16
-	eor r25, r21
-	eor r25, r17
-	rjmp md5_core_F_exit
-/*
-static
-uint32_t md5_I(uint32_t x, uint32_t y, uint32_t z){
-	return (y ^ (x | (~z)));
-}
-*/
-
-jump_table:
-	rjmp md5_F
-	rjmp md5_G
-	rjmp md5_H
-;	rjmp md5_I
-
-; x: r22-r25
-; y: r18-r21
-; z: r14-r17
-md5_I:
-	com r14
-	com r15
-	com r16
-	com r17
-	or  r22, r14
-	or  r23, r15
-	or  r24, r16
-	or  r25, r17
-	eor r22, r18
-	eor r23, r19
-	eor r24, r20
-	eor r25, r21
-	rjmp md5_core_F_exit
-
-as_table:
-;     (as+0)&3  (as+3)&3  (as+1)&3  (as+2)&3
-;                  Z         X         Y
-;     AS_SAVE0  AS_SAVE1  AS_SAVE2  AS_SAVE3 
-.byte   1*4,      0*4,      2*4,      3*4    ;as=1
-.byte   2*4,      1*4,      3*4,      0*4    ;as=2
-.byte   3*4,      2*4,      0*4,      1*4    ;as=3
-.byte   0*4,      3*4,      1*4,      2*4    ;as=4
-
-;###########################################################	
-.global md5_core
-md5_core:
-	mov r21, r20
-	mov r20, r18
-	mov r19, r16
-	mov r18, r14
-;	rjmp md5_core_asm
-/*
-void md5_core(uint32_t* a, void* block, uint8_t as, uint8_t s, uint8_t i, uint8_t fi){
-	uint32_t t;
-	md5_func_t* funcs[]={md5_F, md5_G, md5_H, md5_I};
-	as &= 0x3;
-	/ * a = b + ((a + F(b,c,d) + X[k] + T[i]) <<< s). * /
-	t = a[as] + funcs[fi](a[(as+1)&3], a[(as+2)&3], a[(as+3)&3]) + *((uint32_t*)block) + md5_T[i] ;
-	a[as]=a[(as+1)&3] + ROTL32(t, s);
-}
-*/
-; a:     r24-r25
-; block: r22-r23
-; as:    r21
-; s:     r20
-; i:     r19
-; fi:    r18
-P_A0 = 24
-P_A1 = 25
-P_B0 = 22
-P_B1 = 23
-P_AS = 21
-P_S  = 20
-P_I  = 19
-P_FI = 18
-
-; x: r22-r25
-; y: r18-r21
-; z: r14-r17
-
-
-AS_SAVE0  =  4
-AS_SAVE1  =  5
-AS_SAVE2  =  6
-AS_SAVE3  =  7
-FI_SAVE   =  8
-S_SAVE    =  9
-ACCU0     = 10
-ACCU1     = 11
-ACCU2     = 12
-ACCU3     = 13
-ARG_X0    = 22
-ARG_X1    = 23
-ARG_X2    = 24
-ARG_X3    = 25
-ARG_Y0    = 18
-ARG_Y1    = 19
-ARG_Y2    = 20
-ARG_Y3    = 21
-ARG_Z0    = 14
-ARG_Z1    = 15
-ARG_Z2    = 16
-ARG_Z3    = 17
-
-
-md5_core_asm:
-	push r16
-	push r17
-	push_range 4, 8
-	ldi r30, lo8(T_table)
-	ldi r31, hi8(T_table)
-	lsl P_I
-	rol r1
-	lsl P_I
-	rol r1
-	add r30, P_I
-	adc r31, r1
-	clr r1
-	mov FI_SAVE, r18
-	/* loading T[i] into ACCU */	
-	lpm ACCU0, Z+	
-	lpm ACCU1, Z+	
-	lpm ACCU2, Z+	
-	lpm ACCU3, Z
-	/* add *block to ACCU */
-	movw r30, P_B0
-	ld r0, Z+
-	add ACCU0, r0
-	ld r0, Z+
-	adc ACCU1, r0
-	ld r0, Z+
-	adc ACCU2, r0
-	ld r0, Z+
-	adc ACCU3, r0
-	/* add a[as+0&3] to ACCU */
-	ldi r30, lo8(as_table)
-	ldi r31, hi8(as_table)
-	dec P_AS
-	andi P_AS, 0x03
-	lsl P_AS
-	lsl P_AS
-	add r30, r21
-	adc r31, r1       ; Z points to the correct row in as_table
-	lpm AS_SAVE0, Z+
-	lpm AS_SAVE1, Z+
-	lpm AS_SAVE2, Z+
-	lpm AS_SAVE3, Z
-	movw r26, r24     ; X points to a[0]
-	add r26, AS_SAVE0
-	adc r27, r1       ; X points at a[as&3]
-	ld r0, X+
-	add ACCU0, r0
-	ld r0, X+
-	adc ACCU1, r0
-	ld r0, X+
-	adc ACCU2, r0
-	ld r0, X+
-	adc ACCU3, r0
-	mov S_SAVE, r20
-
-	movw r28, r24
-	/* loading z value */
-	movw r26, r28
-	add r26, AS_SAVE1
-	adc r27, r1
-	ld ARG_Z0, X+
-	ld ARG_Z1, X+
-	ld ARG_Z2, X+
-	ld ARG_Z3, X
-
-	/* loading x value */
-	movw r26, r28	
-	add r26, AS_SAVE2
-	adc r27, r1
-	ld ARG_X0, X+
-	ld ARG_X1, X+
-	ld ARG_X2, X+
-	ld ARG_X3, X
-
-	/* loading y value */
-	movw r26, r28
-	add r26, AS_SAVE3
-	adc r27, r1
-	ldi r30, pm_lo8(jump_table)
-	ldi r31, pm_hi8(jump_table)
-	add r30, FI_SAVE
-	adc r31, r1    ; Z points to the correct entry in our jump table
-	ld ARG_Y0, X+
-	ld ARG_Y1, X+
-	ld ARG_Y2, X+
-	ld ARG_Y3, X
-
-	ijmp /* calls the function pointed by Z */
-md5_core_F_exit:		
-
-	/* add ACCU to result of f() */
-	add r22, ACCU0
-	adc r23, ACCU1
-	adc r24, ACCU2
-	adc r25, ACCU3
-
-	/* rotate */
-	mov r20, S_SAVE
-rotl32:
-	cpi r20, 8
-	brlo bitrotl
-	mov r21, r25
-	mov r25, r24
-	mov r24, r23
-	mov r23, r22
-	mov r22, r21
-	subi r20, 8
-	rjmp rotl32
-bitrotl:
-	mov r21, r25
-bitrotl_loop:	
-	tst r20
-	breq fixrotl
-bitrotl_loop2:	
-	lsl r21
-	rol r22
-	rol r23
-	rol r24
-	rol r25
-	dec r20
-	brne bitrotl_loop2
-fixrotl:
-
-	/* add a[(as+1)&3]  */
-	movw r26, r28
-	add r26, AS_SAVE2
-	adc r27, r1
-	ld r0, X+
-	add r22, r0
-	ld r0, X+
-	adc r23, r0
-	ld r0, X+
-	adc r24, r0
-	ld r0, X
-	adc r25, r0
-
-	/* store result */
-	movw r26, r28
-	add r26, AS_SAVE0
-	adc r27, r1
-	st X+, r22
-	st X+, r23
-	st X+, r24
-	st X , r25	
-md5_core_exit:
-	pop_range 4, 8
-	pop r17
-	pop r16
-	ret
-
-;###################################################################
-/*
-void md5_nextBlock(md5_ctx_t *state, void* block){
-	uint32_t	a[4];
-	uint8_t		m,n,i=0;
-
-	a[0]=state->a[0];
-	a[1]=state->a[1];
-	a[2]=state->a[2];
-	a[3]=state->a[3];
-	
-	/ * round 1 * /
-	uint8_t s1t[]={7,12,17,22}; // 1,-1   1,4   2,-1   3,-2
-	for(m=0;m<4;++m){
-		for(n=0;n<4;++n){
-			md5_core(a, &(((uint32_t*)block)[m*4+n]), 4-n, s1t[n],i++,0);
-		}
-	}
-	/ * round 2 * /
-	uint8_t s2t[]={5,9,14,20}; // 1,-3   1,1   2,-2   2,4
-	for(m=0;m<4;++m){
-		for(n=0;n<4;++n){
-			md5_core(a, &(((uint32_t*)block)[(1+m*4+n*5)&0xf]), 4-n, s2t[n],i++,1);
-		}
-	}
-	/ * round 3 * /
-	uint8_t s3t[]={4,11,16,23}; // 0,4   1,3   2,0   3,-1
-	for(m=0;m<4;++m){
-		for(n=0;n<4;++n){
-			md5_core(a, &(((uint32_t*)block)[(5-m*4+n*3)&0xf]), 4-n, s3t[n],i++,2);
-		}
-	}
-	/ * round 4 * /
-	uint8_t s4t[]={6,10,15,21}; // 1,-2   1,2   2,-1   3,-3
-	for(m=0;m<4;++m){
-		for(n=0;n<4;++n){
-			md5_core(a, &(((uint32_t*)block)[(0-m*4+n*7)&0xf]), 4-n, s4t[n],i++,3);
-		}
-	}
-	state->a[0] += a[0];
-	state->a[1] += a[1];
-	state->a[2] += a[2];
-	state->a[3] += a[3];
-	state->counter++;
-}
-*/
-
-shift_table_1:  .byte  7,12,17,22
-shift_table_2:  .byte  5, 9,14,20
-shift_table_3:  .byte  4,11,16,23
-shift_table_4:  .byte  6,10,15,21
-
-index_table_r2:
-;(1+m*4+n*5)&0xf:
-        .byte 0x04, 0x18, 0x2c, 0x00 
-        .byte 0x14, 0x28, 0x3c, 0x10 
-        .byte 0x24, 0x38, 0x0c, 0x20 
-        .byte 0x34, 0x08, 0x1c, 0x30 
-
-index_table_r3:
-;(5-m*4+n*3)&0xf:
-        .byte 0x14, 0x20, 0x2c, 0x38 
-        .byte 0x04, 0x10, 0x1c, 0x28 
-        .byte 0x34, 0x00, 0x0c, 0x18 
-        .byte 0x24, 0x30, 0x3c, 0x08 
-
-index_table_r4:
-;(0-m*4+n*7)&0xf:
-        .byte 0x00, 0x1c, 0x38, 0x14 
-        .byte 0x30, 0x0c, 0x28, 0x04 
-        .byte 0x20, 0x3c, 0x18, 0x34 
-        .byte 0x10, 0x2c, 0x08, 0x24
-
-APTR_REG = 2
-BPTR_REG = 4
-N_REG = 6
-M_REG = 7
-I_REG = 8
-.global md5_nextBlock
-md5_nextBlock:
-	stack_alloc 16
-	push_range 2, 17
-	push r28
-	push r29
-	push r24
-	push r25
-	adiw r30, 1 /* Z now points to the beginning of the allocated memory */
-	movw r2, r30
-	movw r4, r22
-	movw r26, r24
-	ldi r20, 16
-1:
-	ld r0, X+
-	st Z+, r0
-	dec r20
-	brne 1b
-	/* state now copied to stack memory */
-	clr I_REG	
-	/* Round 1 */
-	clr M_REG
-	ldi r17, 4
-1:
-	clr N_REG	
-	ldi r16, 4
-2:
-	movw r24, APTR_REG
-	movw r22, BPTR_REG 
-	mov r0, M_REG
-	lsl r0
-	lsl r0
-	add r0, N_REG
-	lsl r0
-	lsl r0
-	add r22, r0
-	adc r23, r1
-	mov r21, r16	
-	ldi r30, lo8(shift_table_1)
-	ldi r31, hi8(shift_table_1)
-	add r30, N_REG
-	adc r31, r1
-	lpm r20, Z
-	mov r19, I_REG
-	ldi r18, 0
-	rcall md5_core_asm
-	inc I_REG
-	inc N_REG
-	dec r16
-	brne 2b
-	inc M_REG
-	dec r17
-	brne 1b
-	
-	/* Round 2 */
-	clr M_REG
-	ldi r17, 4
-1:
-	clr N_REG	
-	ldi r16, 4
-2:
-	movw r24, APTR_REG
-	movw r22, BPTR_REG 
-	ldi r30, lo8(index_table_r2)
-	ldi r31, hi8(index_table_r2)
-	mov r0, M_REG
-	lsl r0
-	lsl r0
-	add r0, N_REG
-	add r30, r0
-	adc r31, r1
-	lpm r0, Z	
-	add r22, r0
-	adc r23, r1
-	mov r21, r16	
-	ldi r30, lo8(shift_table_2)
-	ldi r31, hi8(shift_table_2)
-	add r30, N_REG
-	adc r31, r1
-	lpm r20, Z
-	mov r19, I_REG
-	ldi r18, 1
-	rcall md5_core_asm
-	inc I_REG
-	inc N_REG
-	dec r16
-	brne 2b
-	inc M_REG
-	dec r17
-	brne 1b
-
-	/* Round 3 */
-	clr M_REG
-	ldi r17, 4
-1:
-	clr N_REG	
-	ldi r16, 4
-2:
-	movw r24, APTR_REG
-	movw r22, BPTR_REG 
-	ldi r30, lo8(index_table_r3)
-	ldi r31, hi8(index_table_r3)
-	mov r0, M_REG
-	lsl r0
-	lsl r0
-	add r0, N_REG
-	add r30, r0
-	adc r31, r1
-	lpm r0, Z	
-	add r22, r0
-	adc r23, r1
-	mov r21, r16	
-	ldi r30, lo8(shift_table_3)
-	ldi r31, hi8(shift_table_3)
-	add r30, N_REG
-	adc r31, r1
-	lpm r20, Z
-	mov r19, I_REG
-	ldi r18, 2
-	rcall md5_core_asm
-	inc I_REG
-	inc N_REG
-	dec r16
-	brne 2b
-	inc M_REG
-	dec r17
-	brne 1b
-
-	/* Round 4 */
-	clr M_REG
-	ldi r17, 4
-1:
-	clr N_REG	
-	ldi r16, 4
-2:
-	movw r24, APTR_REG
-	movw r22, BPTR_REG 
-	ldi r30, lo8(index_table_r4)
-	ldi r31, hi8(index_table_r4)
-	mov r0, M_REG
-	lsl r0
-	lsl r0
-	add r0, N_REG
-	add r30, r0
-	adc r31, r1
-	lpm r0, Z	
-	add r22, r0
-	adc r23, r1
-	mov r21, r16	
-	ldi r30, lo8(shift_table_4)
-	ldi r31, hi8(shift_table_4)
-	add r30, N_REG
-	adc r31, r1
-	lpm r20, Z
-	mov r19, I_REG
-	ldi r18, 3
-	rcall md5_core_asm
-	inc I_REG
-	inc N_REG
-	dec r16
-	brne 2b
-	inc M_REG
-	dec r17
-	brne 1b
-
-
-	pop r27
-	pop r26 /* X now points to the context */
-	movw r30, APTR_REG
-	ldi r16, 4
-1:
-	ld r0, X
-	ld r2, Z+
-	add r0, r2
-	st X+, r0	
-	ld r0, X
-	ld r2, Z+
-	adc r0, r2
-	st X+, r0	
-	ld r0, X
-	ld r2, Z+
-	adc r0, r2
-	st X+, r0	
-	ld r0, X
-	ld r2, Z+
-	adc r0, r2
-	st X+, r0	
-	dec r16
-	brne 1b
-
-	ld r0, X
-	inc r0
-	st X+, r0	
-	brne 2f
-	ld r0, X
-	inc r0
-	st X+, r0	
-	brne 2f
-	ld r0, X
-	inc r0
-	st X+, r0	
-	brne 2f	
-	ld r0, X
-	inc r0
-	st X+, r0	
-2:			
-
-	pop r29
-	pop r28
-	pop_range 2, 17
-	stack_free 16
-	ret
-
-;###############################################################################
-/*
-void md5_lastBlock(md5_ctx_t *state, const void* block, uint16_t length_b){
-	uint16_t l;
-	uint8_t b[64];
-	while (length_b >= 512){
-		md5_nextBlock(state, block);
-		length_b -= 512;
-		block = ((uint8_t*)block) + 512/8;
-	}
-	memset(b, 0, 64);
-	memcpy(b, block, length_b/8);
-	/ * insert padding one * /
-	l=length_b/8;
-	if(length_b%8){
-		uint8_t t;
-		t = ((uint8_t*)block)[l];
-		t |= (0x80>>(length_b%8));
-		b[l]=t;
-	}else{
-		b[l]=0x80;
-	}
-	/ * insert length value * /
-	if(l+sizeof(uint64_t) >= 512/8){
-		md5_nextBlock(state, b);
-		state->counter--;
-		memset(b, 0, 64-8);
-	}
-	*((uint64_t*)&b[64-sizeof(uint64_t)]) = (state->counter * 512) + length_b;
-	md5_nextBlock(state, b);
-}
-*/
-; state_ptr : r24,r25
-; block_ptr : r22,r23
-; length_b  : r20,r21
-.global md5_lastBlock
-md5_lastBlock:
-	stack_alloc_large 64
-	push_range 12, 17
-	push r30
-	push r31
-	movw r16, r20 /* length_b  */ 
-	movw r14, r22 /* block_ptr */
-	movw r12, r24 /* state_ptr */
-	ldi r18, 64
-2:
-	cpi r17, 2 /* hi8(512) */	
-	brlo 2f
-1:
-	movw r24, r12
-	movw r22, r14
-	rcall md5_nextBlock
-	add r14, r18
-	adc r15, r1
-	subi r17, 2
-	rjmp 2b
-2:
-	pop r31
-	pop r30
-
-	adiw r30, 1 /* adjust Z to point to buffer */
-	movw r26, r14
-	movw r24, r16
-	adiw r24, 7
-
-	lsr r25
-	ror r24
-	lsr r25
-	ror r24
-	lsr r24 /* r24 now holds how many bytes are to copy */
-    ldi r18, 64
-	sub r18, r24 /* r18 will hold the amount of used bytes in buffer */
-	tst r24
-4:
-	breq 5f
-	ld r0, X+
-	st Z+, r0 
-	dec r24
-	rjmp 4b /* Z points to the byte after msg in buffer */
-5:	/* append 1-bit */
-	mov r20, r16
-	ldi r19, 0x80
-	andi r20, 0x07
-	brne bit_fucking
-	st Z+, r19
-	dec r18 /* 'allocate' another byte in buffer */
-	rjmp after_bit_fucking
-bit_fucking:
-1:
-	lsr r19
-	dec r20
-	brne 1b
-	or r0, r19
-	st -Z, r0
-    adiw r30, 1
-after_bit_fucking:
-	clt	
-	cpi r18, 8
-	brmi 2f
-	set         /* store in t if the counter will also fit in this block (1 if fit)*/
-2:
-	tst r18
-	breq 2f
-1: /* fill remaning buffer with zeros */
-	st Z+, r1
-	dec r18
-	brne 1b
-2:
-	sbiw r30, 63
-	sbiw r30,  1
-	movw r14, r30 /* r14:r15 now points to buffer */	
-	brts load_counter
-	/* counter does not fit, finalize this block */
-	movw r24, r12
-	movw r22, r14
-	rcall md5_nextBlock
-	movw r30, r14
-	ldi r20, 64-8
-3:
-	st Z+, r1
-	dec r20
-	brne 3b
-	
-load_counter:		
-	movw r26, r12 /* X points to state */
-	adiw r26, 16
-	ld r19, X+
-	ld r20, X+
-	ld r21, X+
-	ld r22, X+
-	brts post_counter_decrement	/* do not decremen because counter fits */
-counter_decrement:
-	subi r19, 1
-	sbci r20, 0
-	sbci r21, 0
-	sbci r22, 0
-post_counter_decrement:
-	clr r18
-	clr r23
-	lsl r19
-	rol r20
-	rol r21
-	rol r22
-	rol r23
-	mov r18, r16 /* r16:r17 length_b */
-	add r19, r17
-	adc r20, r1
-	adc r21, r1
-	adc r22, r1
-	adc r23, r1
-	movw r30, r14
-	adiw r30, 64-8
-	st Z+, r18
-	st Z+, r19
-	st Z+, r20
-	st Z+, r21
-	st Z+, r22
-	st Z+, r23
-	st Z+, r1
-	st Z, r1
-
-	sbiw r30, 63
-;	sbiw r30, 1
-	movw r24, r12
-	movw r22, r30
-	rcall md5_nextBlock
-md5_lastBlock_exit:	
-	pop_range 12, 17
-	stack_free_large 64
-	ret
-
-
-;###############################################################################
-
-
-.global md5_ctx2hash
-md5_ctx2hash:
-	movw r26, r24
-	movw r30, r22
-	ldi r22, 16
-1:
-	ld r0, Z+
-	st X+, r0
-	dec r22
-	brne 1b	
-	ret
-
-
-;###############################################################################
-
-
-.global md5
-md5:
-	stack_alloc 20
-	push_range  8, 17
-	adiw r30, 1
-	movw  r8, r30 /* ctx           */
-	movw r10, r24 /* dest          */
-	movw r12, r22 /* msg           */
-	movw r14, r18 /* length (low)  */
-	movw r16, r20 /* length (high) */
-	movw r24, r30
-	rcall md5_init
-1:
-	tst r16
-	brne next_round
-	tst r17
-	breq last_round
-next_round:
-	movw r24,  r8
-	movw r22, r12
-	rcall md5_nextBlock
-	ldi r22, 64
-	add r12, r22
-	adc r13, r1
-	ldi r22, 2
-	sub r15, r22
-	sbci r16, 0
-	sbci r17, 0
-	rjmp 1b
-last_round:		
-	movw r24, r8
-	movw r22, r12
-	movw r20, r14
-	rcall md5_lastBlock
-	movw r24, r10
-	movw r22,  r8
-	rcall md5_ctx2hash
-	pop_range  8, 17
-	stack_free 20
-	ret
-
-
-
diff --git a/md5.c b/md5.c
deleted file mode 100644
index 003699e..0000000
--- a/md5.c
+++ /dev/null
@@ -1,185 +0,0 @@
-/* md5.c */
-/*
-    This file is part of the AVR-Crypto-Lib.
-    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-/* 
- * \file	md5.c
- * \author	Daniel Otte
- * \date 	2006-07-31
- * \license GPLv3 or later
- * \brief   Implementation of the MD5 hash algorithm as described in RFC 1321
- * 
- */
-
- #include "md5.h"
- #include "md5_sbox.h"
- #include "cli.h" 
- #include <stdint.h>
- #include <string.h>
- 
- #undef DEBUG
- 
-void md5_init(md5_ctx_t *s){
-	s->counter = 0;
-	s->a[0] = 0x67452301;
-	s->a[1] = 0xefcdab89;
-	s->a[2] = 0x98badcfe;
-	s->a[3] = 0x10325476;
-}
-
-static 
-uint32_t md5_F(uint32_t x, uint32_t y, uint32_t z){
-	return ((x&y)|((~x)&z));
-}
-
-static
-uint32_t md5_G(uint32_t x, uint32_t y, uint32_t z){
-	return ((x&z)|((~z)&y));
-}
-
-static
-uint32_t md5_H(uint32_t x, uint32_t y, uint32_t z){
-	return (x^y^z);
-}
-
-static
-uint32_t md5_I(uint32_t x, uint32_t y, uint32_t z){
-	return (y ^ (x | (~z)));
-}
-
-typedef uint32_t md5_func_t(uint32_t, uint32_t, uint32_t);
-
-#define ROTL32(x,n) (((x)<<(n)) | ((x)>>(32-(n))))  
-
-static
-void md5_core(uint32_t* a, void* block, uint8_t as, uint8_t s, uint8_t i, uint8_t fi){
-	uint32_t t;
-	md5_func_t* funcs[]={md5_F, md5_G, md5_H, md5_I};
-	as &= 0x3;
-	/* a = b + ((a + F(b,c,d) + X[k] + T[i]) <<< s). */
-#ifdef DEBUG
-	char funcc[]={'*', '-', '+', '~'};
-	cli_putstr("\r\n DBG: md5_core [");
-	cli_putc(funcc[fi]);
-	cli_hexdump(&as, 1); cli_putc(' ');
-	cli_hexdump(&k, 1); cli_putc(' ');
-	cli_hexdump(&s, 1); cli_putc(' ');
-	cli_hexdump(&i, 1); cli_putc(']');
-#endif	
-	t = a[as] + funcs[fi](a[(as+1)&3], a[(as+2)&3], a[(as+3)&3]) 
-	    + *((uint32_t*)block) + pgm_read_dword(md5_T+i) ;
-	a[as]=a[(as+1)&3] + ROTL32(t, s);
-}
-
-void md5_nextBlock(md5_ctx_t *state, const void* block){
-	uint32_t	a[4];
-	uint8_t		m,n,i=0;
-	/* this requires other mixed sboxes */
-#ifdef DEBUG
-	cli_putstr("\r\n DBG: md5_nextBlock: block:\r\n");
-	cli_hexdump(block, 16);	cli_putstr("\r\n");
-	cli_hexdump(block+16, 16);	cli_putstr("\r\n");
-	cli_hexdump(block+32, 16);	cli_putstr("\r\n");
-	cli_hexdump(block+48, 16);	cli_putstr("\r\n");
-#endif	
-	
-	a[0]=state->a[0];
-	a[1]=state->a[1];
-	a[2]=state->a[2];
-	a[3]=state->a[3];
-	
-	/* round 1 */
-	uint8_t s1t[]={7,12,17,22}; // 1,-1   1,4   2,-1   3,-2
-	for(m=0;m<4;++m){
-		for(n=0;n<4;++n){
-			md5_core(a, &(((uint32_t*)block)[m*4+n]), 4-n, s1t[n],i++,0);
-		}
-	}
-	/* round 2 */
-	uint8_t s2t[]={5,9,14,20}; // 1,-3   1,1   2,-2   2,4
-	for(m=0;m<4;++m){
-		for(n=0;n<4;++n){
-			md5_core(a, &(((uint32_t*)block)[(1+m*4+n*5)&0xf]), 4-n, s2t[n],i++,1);
-		}
-	}
-	/* round 3 */
-	uint8_t s3t[]={4,11,16,23}; // 0,4   1,3   2,0   3,-1
-	for(m=0;m<4;++m){
-		for(n=0;n<4;++n){
-			md5_core(a, &(((uint32_t*)block)[(5-m*4+n*3)&0xf]), 4-n, s3t[n],i++,2);
-		}
-	}
-	/* round 4 */
-	uint8_t s4t[]={6,10,15,21}; // 1,-2   1,2   2,-1   3,-3
-	for(m=0;m<4;++m){
-		for(n=0;n<4;++n){
-			md5_core(a, &(((uint32_t*)block)[(0-m*4+n*7)&0xf]), 4-n, s4t[n],i++,3);
-		}
-	}
-	state->a[0] += a[0];
-	state->a[1] += a[1];
-	state->a[2] += a[2];
-	state->a[3] += a[3];
-	state->counter++;
-}
-
-void md5_lastBlock(md5_ctx_t *state, const void* block, uint16_t length_b){
-	uint16_t l;
-	uint8_t b[64];
-	while (length_b >= 512){
-		md5_nextBlock(state, block);
-		length_b -= 512;
-		block = ((uint8_t*)block) + 512/8;
-	}
-	memset(b, 0, 64);
-	memcpy(b, block, length_b/8);
-	/* insert padding one */
-	l=length_b/8;
-	if(length_b%8){
-		uint8_t t;
-		t = ((uint8_t*)block)[l];
-		t |= (0x80>>(length_b%8));
-		b[l]=t;
-	}else{
-		b[l]=0x80;
-	}
-	/* insert length value */
-	if(l+sizeof(uint64_t) >= 512/8){
-		md5_nextBlock(state, b);
-		state->counter--;
-		memset(b, 0, 64-8);
-	}
-	*((uint64_t*)&b[64-sizeof(uint64_t)]) = (state->counter * 512) + length_b;
-	md5_nextBlock(state, b);
-}
-
-void md5_ctx2hash(md5_hash_t* dest, const md5_ctx_t* state){
-	memcpy(dest, state->a, MD5_HASH_BYTES);
-}
-
-void md5(md5_hash_t* dest, const void* msg, uint32_t length_b){
-	md5_ctx_t ctx;
-	md5_init(&ctx);
-	while(length_b>=MD5_BLOCK_BITS){
-		md5_nextBlock(&ctx, msg);
-		msg = (uint8_t*)msg + MD5_BLOCK_BYTES;
-		length_b -= MD5_BLOCK_BITS;
-	}
-	md5_lastBlock(&ctx, msg, length_b);
-	md5_ctx2hash(dest, &ctx);
-}
-
diff --git a/md5.h b/md5.h
deleted file mode 100644
index 6b65c4a..0000000
--- a/md5.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/* md5.h */
-/*
-    This file is part of the AVR-Crypto-Lib.
-    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-/* 
- * File:	md5.h
- * Author:	Daniel Otte
- * Date: 	31.07.2006
- * License: GPL
- * Description: Implementation of the MD5 hash algorithm as described in RFC 1321
- * 
- */
-
-
-#ifndef MD5_H_
-#define MD5_H_
-
-#include <stdint.h>
-
-
-#define MD5_HASH_BITS  128
-#define MD5_HASH_BYTES (MD5_HASH_BITS/8)
-#define MD5_BLOCK_BITS 512
-#define MD5_BLOCK_BYTES (MD5_BLOCK_BITS/8)
-
-
-typedef struct md5_ctx_st {
-	uint32_t a[4];
-	uint32_t counter;
-} md5_ctx_t;
-
-typedef uint8_t md5_hash_t[MD5_HASH_BYTES];
-
- 
-void md5_init(md5_ctx_t *s);
-void md5_nextBlock(md5_ctx_t *state, const void* block);
-void md5_lastBlock(md5_ctx_t *state, const void* block, uint16_t length);
-void md5_ctx2hash(md5_hash_t* dest, const md5_ctx_t* state);
-void md5(md5_hash_t* dest, const void* msg, uint32_t length_b);
-
-#endif /*MD5_H_*/
diff --git a/md5/md5-asm.S b/md5/md5-asm.S
new file mode 100644
index 0000000..de3b170
--- /dev/null
+++ b/md5/md5-asm.S
@@ -0,0 +1,977 @@
+/* md5-asm.S */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/*
+ * Author:  Daniel Otte
+ * License: GPLv3 or later
+ * Date:    2008-11-15
+*/
+
+
+#include "avr-asm-macros.S"
+
+;###########################################################	
+; S-BOX
+
+T_table:
+.hword	0xa478, 0xd76a, 0xb756, 0xe8c7, 0x70db, 0x2420, 0xceee, 0xc1bd, 0x0faf, 0xf57c 
+.hword	0xc62a, 0x4787, 0x4613, 0xa830, 0x9501, 0xfd46, 0x98d8, 0x6980, 0xf7af, 0x8b44 
+.hword	0x5bb1, 0xffff, 0xd7be, 0x895c, 0x1122, 0x6b90, 0x7193, 0xfd98, 0x438e, 0xa679 
+.hword	0x0821, 0x49b4, 0x2562, 0xf61e, 0xb340, 0xc040, 0x5a51, 0x265e, 0xc7aa, 0xe9b6 
+.hword	0x105d, 0xd62f, 0x1453, 0x0244, 0xe681, 0xd8a1, 0xfbc8, 0xe7d3, 0xcde6, 0x21e1 
+.hword	0x07d6, 0xc337, 0x0d87, 0xf4d5, 0x14ed, 0x455a, 0xe905, 0xa9e3, 0xa3f8, 0xfcef 
+.hword	0x02d9, 0x676f, 0x4c8a, 0x8d2a, 0x3942, 0xfffa, 0xf681, 0x8771, 0x6122, 0x6d9d 
+.hword	0x380c, 0xfde5, 0xea44, 0xa4be, 0xcfa9, 0x4bde, 0x4b60, 0xf6bb, 0xbc70, 0xbebf 
+.hword	0x7ec6, 0x289b, 0x27fa, 0xeaa1, 0x3085, 0xd4ef, 0x1d05, 0x0488, 0xd039, 0xd9d4 
+.hword	0x99e5, 0xe6db, 0x7cf8, 0x1fa2, 0x5665, 0xc4ac, 0x2244, 0xf429, 0xff97, 0x432a 
+.hword	0x23a7, 0xab94, 0xa039, 0xfc93, 0x59c3, 0x655b, 0xcc92, 0x8f0c, 0xf47d, 0xffef 
+.hword	0x5dd1, 0x8584, 0x7e4f, 0x6fa8, 0xe6e0, 0xfe2c, 0x4314, 0xa301, 0x11a1, 0x4e08 
+.hword	0x7e82, 0xf753, 0xf235, 0xbd3a, 0xd2bb, 0x2ad7, 0xd391, 0xeb86
+
+
+#define MD5_init_fast
+
+.global md5_init 
+#ifndef MD5_init_fast
+;###########################################################	
+;void md5_init(md5_ctx_t *state)
+; param1: (r24,r25) 16-bit pointer to sha256_ctx_t struct in ram
+; modifys: Z(r30,r31), X(r25,r26)
+; size = 9+5*4 WORDS = 29 WORDS = 58 Bytes
+md5_init:
+	movw r26, r24 ; (24,25) --> (26,27) load X with param1
+	ldi r30, lo8(md5_init_vector)
+	ldi r31, hi8(md5_init_vector)
+	ldi r24, 16+4
+md5_init_vloop:	
+	lpm r0, Z+ 
+	st X+, r0
+	dec r24
+	brne md5_init_vloop
+	ret
+	
+md5_init_vector:
+.hword 0x2301, 0x6745
+.hword 0xAB89, 0xEFCD 
+.hword 0xDCFE, 0x98BA 
+.hword 0x5476, 0x1032 
+.hword 0x0000, 0x0000
+
+#else
+;###########################################################	
+.global md5_init_fast 
+;void md5_init(md5_ctx_t *state)
+; param1: (r24,r25) 16-bit pointer to sha256_ctx_t struct in ram
+; modifys: r23, r22
+; cycles = 1+16*3+4*2+4 = 1+48+12 = 61
+; size = 1+16*2+4+1 WORDS = 38 WORDS = 76 Bytes
+md5_init:
+md5_init_fast:
+	movw r26, r24
+	ldi r24, 0x01
+	st X+, r24
+	ldi r24, 0x23
+	st X+, r24
+	ldi r24, 0x45
+	st X+, r24
+	ldi r24, 0x67
+	st X+, r24
+	ldi r24, 0x89
+	st X+, r24
+	ldi r24, 0xAB
+	st X+, r24
+	ldi r24, 0xCD
+	st X+, r24
+	ldi r24, 0xEF
+	st X+, r24
+	ldi r24, 0xFE
+	st X+, r24
+	ldi r24, 0xDC
+	st X+, r24
+	ldi r24, 0xBA
+	st X+, r24
+	ldi r24, 0x98
+	st X+, r24
+	ldi r24, 0x76
+	st X+, r24
+	ldi r24, 0x54
+	st X+, r24
+	ldi r24, 0x32
+	st X+, r24
+	ldi r24, 0x10
+	st X+, r24
+	st X+, r1
+	st X+, r1
+	st X+, r1
+	st X+, r1
+	ret
+#endif
+;###########################################################	
+
+/*
+static 
+uint32_t md5_F(uint32_t x, uint32_t y, uint32_t z){
+	return ((x&y)|((~x)&z));
+}
+*/
+; x: r22-r25
+; y: r18-r21
+; z: r14-r17
+md5_F:
+	and r18, r22
+	and r19, r23
+	and r20, r24
+	and r21, r25
+	com r22
+	com r23
+	com r24
+	com r25
+	and r22, r14
+	and r23, r15
+	and r24, r16
+	and r25, r17
+	or  r22, r18
+	or  r23, r19
+	or  r24, r20
+	or  r25, r21
+	rjmp md5_core_F_exit
+	
+/*
+static
+uint32_t md5_G(uint32_t x, uint32_t y, uint32_t z){
+	return ((x&z)|((~z)&y));
+}
+*/
+
+; x: r22-r25
+; y: r18-r21
+; z: r14-r17
+md5_G:
+	and r22, r14
+	and r23, r15
+	and r24, r16
+	and r25, r17
+	com r14
+	com r15
+	com r16
+	com r17
+	and r18, r14
+	and r19, r15
+	and r20, r16
+	and r21, r17
+	or  r22, r18
+	or  r23, r19
+	or  r24, r20
+	or  r25, r21
+	rjmp md5_core_F_exit
+/*
+static
+uint32_t md5_H(uint32_t x, uint32_t y, uint32_t z){
+	return (x^y^z);
+}
+*/
+; x: r22-r25
+; y: r18-r21
+; z: r14-r17
+md5_H:
+	eor r22, r18
+	eor r22, r14
+	eor r23, r19
+	eor r23, r15
+	eor r24, r20
+	eor r24, r16
+	eor r25, r21
+	eor r25, r17
+	rjmp md5_core_F_exit
+/*
+static
+uint32_t md5_I(uint32_t x, uint32_t y, uint32_t z){
+	return (y ^ (x | (~z)));
+}
+*/
+
+jump_table:
+	rjmp md5_F
+	rjmp md5_G
+	rjmp md5_H
+;	rjmp md5_I
+
+; x: r22-r25
+; y: r18-r21
+; z: r14-r17
+md5_I:
+	com r14
+	com r15
+	com r16
+	com r17
+	or  r22, r14
+	or  r23, r15
+	or  r24, r16
+	or  r25, r17
+	eor r22, r18
+	eor r23, r19
+	eor r24, r20
+	eor r25, r21
+	rjmp md5_core_F_exit
+
+as_table:
+;     (as+0)&3  (as+3)&3  (as+1)&3  (as+2)&3
+;                  Z         X         Y
+;     AS_SAVE0  AS_SAVE1  AS_SAVE2  AS_SAVE3 
+.byte   1*4,      0*4,      2*4,      3*4    ;as=1
+.byte   2*4,      1*4,      3*4,      0*4    ;as=2
+.byte   3*4,      2*4,      0*4,      1*4    ;as=3
+.byte   0*4,      3*4,      1*4,      2*4    ;as=4
+
+;###########################################################	
+.global md5_core
+md5_core:
+	mov r21, r20
+	mov r20, r18
+	mov r19, r16
+	mov r18, r14
+;	rjmp md5_core_asm
+/*
+void md5_core(uint32_t* a, void* block, uint8_t as, uint8_t s, uint8_t i, uint8_t fi){
+	uint32_t t;
+	md5_func_t* funcs[]={md5_F, md5_G, md5_H, md5_I};
+	as &= 0x3;
+	/ * a = b + ((a + F(b,c,d) + X[k] + T[i]) <<< s). * /
+	t = a[as] + funcs[fi](a[(as+1)&3], a[(as+2)&3], a[(as+3)&3]) + *((uint32_t*)block) + md5_T[i] ;
+	a[as]=a[(as+1)&3] + ROTL32(t, s);
+}
+*/
+; a:     r24-r25
+; block: r22-r23
+; as:    r21
+; s:     r20
+; i:     r19
+; fi:    r18
+P_A0 = 24
+P_A1 = 25
+P_B0 = 22
+P_B1 = 23
+P_AS = 21
+P_S  = 20
+P_I  = 19
+P_FI = 18
+
+; x: r22-r25
+; y: r18-r21
+; z: r14-r17
+
+
+AS_SAVE0  =  4
+AS_SAVE1  =  5
+AS_SAVE2  =  6
+AS_SAVE3  =  7
+FI_SAVE   =  8
+S_SAVE    =  9
+ACCU0     = 10
+ACCU1     = 11
+ACCU2     = 12
+ACCU3     = 13
+ARG_X0    = 22
+ARG_X1    = 23
+ARG_X2    = 24
+ARG_X3    = 25
+ARG_Y0    = 18
+ARG_Y1    = 19
+ARG_Y2    = 20
+ARG_Y3    = 21
+ARG_Z0    = 14
+ARG_Z1    = 15
+ARG_Z2    = 16
+ARG_Z3    = 17
+
+
+md5_core_asm:
+	push r16
+	push r17
+	push_range 4, 8
+	ldi r30, lo8(T_table)
+	ldi r31, hi8(T_table)
+	lsl P_I
+	rol r1
+	lsl P_I
+	rol r1
+	add r30, P_I
+	adc r31, r1
+	clr r1
+	mov FI_SAVE, r18
+	/* loading T[i] into ACCU */	
+	lpm ACCU0, Z+	
+	lpm ACCU1, Z+	
+	lpm ACCU2, Z+	
+	lpm ACCU3, Z
+	/* add *block to ACCU */
+	movw r30, P_B0
+	ld r0, Z+
+	add ACCU0, r0
+	ld r0, Z+
+	adc ACCU1, r0
+	ld r0, Z+
+	adc ACCU2, r0
+	ld r0, Z+
+	adc ACCU3, r0
+	/* add a[as+0&3] to ACCU */
+	ldi r30, lo8(as_table)
+	ldi r31, hi8(as_table)
+	dec P_AS
+	andi P_AS, 0x03
+	lsl P_AS
+	lsl P_AS
+	add r30, r21
+	adc r31, r1       ; Z points to the correct row in as_table
+	lpm AS_SAVE0, Z+
+	lpm AS_SAVE1, Z+
+	lpm AS_SAVE2, Z+
+	lpm AS_SAVE3, Z
+	movw r26, r24     ; X points to a[0]
+	add r26, AS_SAVE0
+	adc r27, r1       ; X points at a[as&3]
+	ld r0, X+
+	add ACCU0, r0
+	ld r0, X+
+	adc ACCU1, r0
+	ld r0, X+
+	adc ACCU2, r0
+	ld r0, X+
+	adc ACCU3, r0
+	mov S_SAVE, r20
+
+	movw r28, r24
+	/* loading z value */
+	movw r26, r28
+	add r26, AS_SAVE1
+	adc r27, r1
+	ld ARG_Z0, X+
+	ld ARG_Z1, X+
+	ld ARG_Z2, X+
+	ld ARG_Z3, X
+
+	/* loading x value */
+	movw r26, r28	
+	add r26, AS_SAVE2
+	adc r27, r1
+	ld ARG_X0, X+
+	ld ARG_X1, X+
+	ld ARG_X2, X+
+	ld ARG_X3, X
+
+	/* loading y value */
+	movw r26, r28
+	add r26, AS_SAVE3
+	adc r27, r1
+	ldi r30, pm_lo8(jump_table)
+	ldi r31, pm_hi8(jump_table)
+	add r30, FI_SAVE
+	adc r31, r1    ; Z points to the correct entry in our jump table
+	ld ARG_Y0, X+
+	ld ARG_Y1, X+
+	ld ARG_Y2, X+
+	ld ARG_Y3, X
+
+	ijmp /* calls the function pointed by Z */
+md5_core_F_exit:		
+
+	/* add ACCU to result of f() */
+	add r22, ACCU0
+	adc r23, ACCU1
+	adc r24, ACCU2
+	adc r25, ACCU3
+
+	/* rotate */
+	mov r20, S_SAVE
+rotl32:
+	cpi r20, 8
+	brlo bitrotl
+	mov r21, r25
+	mov r25, r24
+	mov r24, r23
+	mov r23, r22
+	mov r22, r21
+	subi r20, 8
+	rjmp rotl32
+bitrotl:
+	mov r21, r25
+bitrotl_loop:	
+	tst r20
+	breq fixrotl
+bitrotl_loop2:	
+	lsl r21
+	rol r22
+	rol r23
+	rol r24
+	rol r25
+	dec r20
+	brne bitrotl_loop2
+fixrotl:
+
+	/* add a[(as+1)&3]  */
+	movw r26, r28
+	add r26, AS_SAVE2
+	adc r27, r1
+	ld r0, X+
+	add r22, r0
+	ld r0, X+
+	adc r23, r0
+	ld r0, X+
+	adc r24, r0
+	ld r0, X
+	adc r25, r0
+
+	/* store result */
+	movw r26, r28
+	add r26, AS_SAVE0
+	adc r27, r1
+	st X+, r22
+	st X+, r23
+	st X+, r24
+	st X , r25	
+md5_core_exit:
+	pop_range 4, 8
+	pop r17
+	pop r16
+	ret
+
+;###################################################################
+/*
+void md5_nextBlock(md5_ctx_t *state, void* block){
+	uint32_t	a[4];
+	uint8_t		m,n,i=0;
+
+	a[0]=state->a[0];
+	a[1]=state->a[1];
+	a[2]=state->a[2];
+	a[3]=state->a[3];
+	
+	/ * round 1 * /
+	uint8_t s1t[]={7,12,17,22}; // 1,-1   1,4   2,-1   3,-2
+	for(m=0;m<4;++m){
+		for(n=0;n<4;++n){
+			md5_core(a, &(((uint32_t*)block)[m*4+n]), 4-n, s1t[n],i++,0);
+		}
+	}
+	/ * round 2 * /
+	uint8_t s2t[]={5,9,14,20}; // 1,-3   1,1   2,-2   2,4
+	for(m=0;m<4;++m){
+		for(n=0;n<4;++n){
+			md5_core(a, &(((uint32_t*)block)[(1+m*4+n*5)&0xf]), 4-n, s2t[n],i++,1);
+		}
+	}
+	/ * round 3 * /
+	uint8_t s3t[]={4,11,16,23}; // 0,4   1,3   2,0   3,-1
+	for(m=0;m<4;++m){
+		for(n=0;n<4;++n){
+			md5_core(a, &(((uint32_t*)block)[(5-m*4+n*3)&0xf]), 4-n, s3t[n],i++,2);
+		}
+	}
+	/ * round 4 * /
+	uint8_t s4t[]={6,10,15,21}; // 1,-2   1,2   2,-1   3,-3
+	for(m=0;m<4;++m){
+		for(n=0;n<4;++n){
+			md5_core(a, &(((uint32_t*)block)[(0-m*4+n*7)&0xf]), 4-n, s4t[n],i++,3);
+		}
+	}
+	state->a[0] += a[0];
+	state->a[1] += a[1];
+	state->a[2] += a[2];
+	state->a[3] += a[3];
+	state->counter++;
+}
+*/
+
+shift_table_1:  .byte  7,12,17,22
+shift_table_2:  .byte  5, 9,14,20
+shift_table_3:  .byte  4,11,16,23
+shift_table_4:  .byte  6,10,15,21
+
+index_table_r2:
+;(1+m*4+n*5)&0xf:
+        .byte 0x04, 0x18, 0x2c, 0x00 
+        .byte 0x14, 0x28, 0x3c, 0x10 
+        .byte 0x24, 0x38, 0x0c, 0x20 
+        .byte 0x34, 0x08, 0x1c, 0x30 
+
+index_table_r3:
+;(5-m*4+n*3)&0xf:
+        .byte 0x14, 0x20, 0x2c, 0x38 
+        .byte 0x04, 0x10, 0x1c, 0x28 
+        .byte 0x34, 0x00, 0x0c, 0x18 
+        .byte 0x24, 0x30, 0x3c, 0x08 
+
+index_table_r4:
+;(0-m*4+n*7)&0xf:
+        .byte 0x00, 0x1c, 0x38, 0x14 
+        .byte 0x30, 0x0c, 0x28, 0x04 
+        .byte 0x20, 0x3c, 0x18, 0x34 
+        .byte 0x10, 0x2c, 0x08, 0x24
+
+APTR_REG = 2
+BPTR_REG = 4
+N_REG = 6
+M_REG = 7
+I_REG = 8
+.global md5_nextBlock
+md5_nextBlock:
+	stack_alloc 16
+	push_range 2, 17
+	push r28
+	push r29
+	push r24
+	push r25
+	adiw r30, 1 /* Z now points to the beginning of the allocated memory */
+	movw r2, r30
+	movw r4, r22
+	movw r26, r24
+	ldi r20, 16
+1:
+	ld r0, X+
+	st Z+, r0
+	dec r20
+	brne 1b
+	/* state now copied to stack memory */
+	clr I_REG	
+	/* Round 1 */
+	clr M_REG
+	ldi r17, 4
+1:
+	clr N_REG	
+	ldi r16, 4
+2:
+	movw r24, APTR_REG
+	movw r22, BPTR_REG 
+	mov r0, M_REG
+	lsl r0
+	lsl r0
+	add r0, N_REG
+	lsl r0
+	lsl r0
+	add r22, r0
+	adc r23, r1
+	mov r21, r16	
+	ldi r30, lo8(shift_table_1)
+	ldi r31, hi8(shift_table_1)
+	add r30, N_REG
+	adc r31, r1
+	lpm r20, Z
+	mov r19, I_REG
+	ldi r18, 0
+	rcall md5_core_asm
+	inc I_REG
+	inc N_REG
+	dec r16
+	brne 2b
+	inc M_REG
+	dec r17
+	brne 1b
+	
+	/* Round 2 */
+	clr M_REG
+	ldi r17, 4
+1:
+	clr N_REG	
+	ldi r16, 4
+2:
+	movw r24, APTR_REG
+	movw r22, BPTR_REG 
+	ldi r30, lo8(index_table_r2)
+	ldi r31, hi8(index_table_r2)
+	mov r0, M_REG
+	lsl r0
+	lsl r0
+	add r0, N_REG
+	add r30, r0
+	adc r31, r1
+	lpm r0, Z	
+	add r22, r0
+	adc r23, r1
+	mov r21, r16	
+	ldi r30, lo8(shift_table_2)
+	ldi r31, hi8(shift_table_2)
+	add r30, N_REG
+	adc r31, r1
+	lpm r20, Z
+	mov r19, I_REG
+	ldi r18, 1
+	rcall md5_core_asm
+	inc I_REG
+	inc N_REG
+	dec r16
+	brne 2b
+	inc M_REG
+	dec r17
+	brne 1b
+
+	/* Round 3 */
+	clr M_REG
+	ldi r17, 4
+1:
+	clr N_REG	
+	ldi r16, 4
+2:
+	movw r24, APTR_REG
+	movw r22, BPTR_REG 
+	ldi r30, lo8(index_table_r3)
+	ldi r31, hi8(index_table_r3)
+	mov r0, M_REG
+	lsl r0
+	lsl r0
+	add r0, N_REG
+	add r30, r0
+	adc r31, r1
+	lpm r0, Z	
+	add r22, r0
+	adc r23, r1
+	mov r21, r16	
+	ldi r30, lo8(shift_table_3)
+	ldi r31, hi8(shift_table_3)
+	add r30, N_REG
+	adc r31, r1
+	lpm r20, Z
+	mov r19, I_REG
+	ldi r18, 2
+	rcall md5_core_asm
+	inc I_REG
+	inc N_REG
+	dec r16
+	brne 2b
+	inc M_REG
+	dec r17
+	brne 1b
+
+	/* Round 4 */
+	clr M_REG
+	ldi r17, 4
+1:
+	clr N_REG	
+	ldi r16, 4
+2:
+	movw r24, APTR_REG
+	movw r22, BPTR_REG 
+	ldi r30, lo8(index_table_r4)
+	ldi r31, hi8(index_table_r4)
+	mov r0, M_REG
+	lsl r0
+	lsl r0
+	add r0, N_REG
+	add r30, r0
+	adc r31, r1
+	lpm r0, Z	
+	add r22, r0
+	adc r23, r1
+	mov r21, r16	
+	ldi r30, lo8(shift_table_4)
+	ldi r31, hi8(shift_table_4)
+	add r30, N_REG
+	adc r31, r1
+	lpm r20, Z
+	mov r19, I_REG
+	ldi r18, 3
+	rcall md5_core_asm
+	inc I_REG
+	inc N_REG
+	dec r16
+	brne 2b
+	inc M_REG
+	dec r17
+	brne 1b
+
+
+	pop r27
+	pop r26 /* X now points to the context */
+	movw r30, APTR_REG
+	ldi r16, 4
+1:
+	ld r0, X
+	ld r2, Z+
+	add r0, r2
+	st X+, r0	
+	ld r0, X
+	ld r2, Z+
+	adc r0, r2
+	st X+, r0	
+	ld r0, X
+	ld r2, Z+
+	adc r0, r2
+	st X+, r0	
+	ld r0, X
+	ld r2, Z+
+	adc r0, r2
+	st X+, r0	
+	dec r16
+	brne 1b
+
+	ld r0, X
+	inc r0
+	st X+, r0	
+	brne 2f
+	ld r0, X
+	inc r0
+	st X+, r0	
+	brne 2f
+	ld r0, X
+	inc r0
+	st X+, r0	
+	brne 2f	
+	ld r0, X
+	inc r0
+	st X+, r0	
+2:			
+
+	pop r29
+	pop r28
+	pop_range 2, 17
+	stack_free 16
+	ret
+
+;###############################################################################
+/*
+void md5_lastBlock(md5_ctx_t *state, const void* block, uint16_t length_b){
+	uint16_t l;
+	uint8_t b[64];
+	while (length_b >= 512){
+		md5_nextBlock(state, block);
+		length_b -= 512;
+		block = ((uint8_t*)block) + 512/8;
+	}
+	memset(b, 0, 64);
+	memcpy(b, block, length_b/8);
+	/ * insert padding one * /
+	l=length_b/8;
+	if(length_b%8){
+		uint8_t t;
+		t = ((uint8_t*)block)[l];
+		t |= (0x80>>(length_b%8));
+		b[l]=t;
+	}else{
+		b[l]=0x80;
+	}
+	/ * insert length value * /
+	if(l+sizeof(uint64_t) >= 512/8){
+		md5_nextBlock(state, b);
+		state->counter--;
+		memset(b, 0, 64-8);
+	}
+	*((uint64_t*)&b[64-sizeof(uint64_t)]) = (state->counter * 512) + length_b;
+	md5_nextBlock(state, b);
+}
+*/
+; state_ptr : r24,r25
+; block_ptr : r22,r23
+; length_b  : r20,r21
+.global md5_lastBlock
+md5_lastBlock:
+	stack_alloc_large 64
+	push_range 12, 17
+	push r30
+	push r31
+	movw r16, r20 /* length_b  */ 
+	movw r14, r22 /* block_ptr */
+	movw r12, r24 /* state_ptr */
+	ldi r18, 64
+2:
+	cpi r17, 2 /* hi8(512) */	
+	brlo 2f
+1:
+	movw r24, r12
+	movw r22, r14
+	rcall md5_nextBlock
+	add r14, r18
+	adc r15, r1
+	subi r17, 2
+	rjmp 2b
+2:
+	pop r31
+	pop r30
+
+	adiw r30, 1 /* adjust Z to point to buffer */
+	movw r26, r14
+	movw r24, r16
+	adiw r24, 7
+
+	lsr r25
+	ror r24
+	lsr r25
+	ror r24
+	lsr r24 /* r24 now holds how many bytes are to copy */
+    ldi r18, 64
+	sub r18, r24 /* r18 will hold the amount of used bytes in buffer */
+	tst r24
+4:
+	breq 5f
+	ld r0, X+
+	st Z+, r0 
+	dec r24
+	rjmp 4b /* Z points to the byte after msg in buffer */
+5:	/* append 1-bit */
+	mov r20, r16
+	ldi r19, 0x80
+	andi r20, 0x07
+	brne bit_fucking
+	st Z+, r19
+	dec r18 /* 'allocate' another byte in buffer */
+	rjmp after_bit_fucking
+bit_fucking:
+1:
+	lsr r19
+	dec r20
+	brne 1b
+	or r0, r19
+	st -Z, r0
+    adiw r30, 1
+after_bit_fucking:
+	clt	
+	cpi r18, 8
+	brmi 2f
+	set         /* store in t if the counter will also fit in this block (1 if fit)*/
+2:
+	tst r18
+	breq 2f
+1: /* fill remaning buffer with zeros */
+	st Z+, r1
+	dec r18
+	brne 1b
+2:
+	sbiw r30, 63
+	sbiw r30,  1
+	movw r14, r30 /* r14:r15 now points to buffer */	
+	brts load_counter
+	/* counter does not fit, finalize this block */
+	movw r24, r12
+	movw r22, r14
+	rcall md5_nextBlock
+	movw r30, r14
+	ldi r20, 64-8
+3:
+	st Z+, r1
+	dec r20
+	brne 3b
+	
+load_counter:		
+	movw r26, r12 /* X points to state */
+	adiw r26, 16
+	ld r19, X+
+	ld r20, X+
+	ld r21, X+
+	ld r22, X+
+	brts post_counter_decrement	/* do not decremen because counter fits */
+counter_decrement:
+	subi r19, 1
+	sbci r20, 0
+	sbci r21, 0
+	sbci r22, 0
+post_counter_decrement:
+	clr r18
+	clr r23
+	lsl r19
+	rol r20
+	rol r21
+	rol r22
+	rol r23
+	mov r18, r16 /* r16:r17 length_b */
+	add r19, r17
+	adc r20, r1
+	adc r21, r1
+	adc r22, r1
+	adc r23, r1
+	movw r30, r14
+	adiw r30, 64-8
+	st Z+, r18
+	st Z+, r19
+	st Z+, r20
+	st Z+, r21
+	st Z+, r22
+	st Z+, r23
+	st Z+, r1
+	st Z, r1
+
+	sbiw r30, 63
+;	sbiw r30, 1
+	movw r24, r12
+	movw r22, r30
+	rcall md5_nextBlock
+md5_lastBlock_exit:	
+	pop_range 12, 17
+	stack_free_large 64
+	ret
+
+
+;###############################################################################
+
+
+.global md5_ctx2hash
+md5_ctx2hash:
+	movw r26, r24
+	movw r30, r22
+	ldi r22, 16
+1:
+	ld r0, Z+
+	st X+, r0
+	dec r22
+	brne 1b	
+	ret
+
+
+;###############################################################################
+
+
+.global md5
+md5:
+	stack_alloc 20
+	push_range  8, 17
+	adiw r30, 1
+	movw  r8, r30 /* ctx           */
+	movw r10, r24 /* dest          */
+	movw r12, r22 /* msg           */
+	movw r14, r18 /* length (low)  */
+	movw r16, r20 /* length (high) */
+	movw r24, r30
+	rcall md5_init
+1:
+	tst r16
+	brne next_round
+	tst r17
+	breq last_round
+next_round:
+	movw r24,  r8
+	movw r22, r12
+	rcall md5_nextBlock
+	ldi r22, 64
+	add r12, r22
+	adc r13, r1
+	ldi r22, 2
+	sub r15, r22
+	sbci r16, 0
+	sbci r17, 0
+	rjmp 1b
+last_round:		
+	movw r24, r8
+	movw r22, r12
+	movw r20, r14
+	rcall md5_lastBlock
+	movw r24, r10
+	movw r22,  r8
+	rcall md5_ctx2hash
+	pop_range  8, 17
+	stack_free 20
+	ret
+
+
+
diff --git a/md5/md5.c b/md5/md5.c
new file mode 100644
index 0000000..003699e
--- /dev/null
+++ b/md5/md5.c
@@ -0,0 +1,185 @@
+/* md5.c */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/* 
+ * \file	md5.c
+ * \author	Daniel Otte
+ * \date 	2006-07-31
+ * \license GPLv3 or later
+ * \brief   Implementation of the MD5 hash algorithm as described in RFC 1321
+ * 
+ */
+
+ #include "md5.h"
+ #include "md5_sbox.h"
+ #include "cli.h" 
+ #include <stdint.h>
+ #include <string.h>
+ 
+ #undef DEBUG
+ 
+void md5_init(md5_ctx_t *s){
+	s->counter = 0;
+	s->a[0] = 0x67452301;
+	s->a[1] = 0xefcdab89;
+	s->a[2] = 0x98badcfe;
+	s->a[3] = 0x10325476;
+}
+
+static 
+uint32_t md5_F(uint32_t x, uint32_t y, uint32_t z){
+	return ((x&y)|((~x)&z));
+}
+
+static
+uint32_t md5_G(uint32_t x, uint32_t y, uint32_t z){
+	return ((x&z)|((~z)&y));
+}
+
+static
+uint32_t md5_H(uint32_t x, uint32_t y, uint32_t z){
+	return (x^y^z);
+}
+
+static
+uint32_t md5_I(uint32_t x, uint32_t y, uint32_t z){
+	return (y ^ (x | (~z)));
+}
+
+typedef uint32_t md5_func_t(uint32_t, uint32_t, uint32_t);
+
+#define ROTL32(x,n) (((x)<<(n)) | ((x)>>(32-(n))))  
+
+static
+void md5_core(uint32_t* a, void* block, uint8_t as, uint8_t s, uint8_t i, uint8_t fi){
+	uint32_t t;
+	md5_func_t* funcs[]={md5_F, md5_G, md5_H, md5_I};
+	as &= 0x3;
+	/* a = b + ((a + F(b,c,d) + X[k] + T[i]) <<< s). */
+#ifdef DEBUG
+	char funcc[]={'*', '-', '+', '~'};
+	cli_putstr("\r\n DBG: md5_core [");
+	cli_putc(funcc[fi]);
+	cli_hexdump(&as, 1); cli_putc(' ');
+	cli_hexdump(&k, 1); cli_putc(' ');
+	cli_hexdump(&s, 1); cli_putc(' ');
+	cli_hexdump(&i, 1); cli_putc(']');
+#endif	
+	t = a[as] + funcs[fi](a[(as+1)&3], a[(as+2)&3], a[(as+3)&3]) 
+	    + *((uint32_t*)block) + pgm_read_dword(md5_T+i) ;
+	a[as]=a[(as+1)&3] + ROTL32(t, s);
+}
+
+void md5_nextBlock(md5_ctx_t *state, const void* block){
+	uint32_t	a[4];
+	uint8_t		m,n,i=0;
+	/* this requires other mixed sboxes */
+#ifdef DEBUG
+	cli_putstr("\r\n DBG: md5_nextBlock: block:\r\n");
+	cli_hexdump(block, 16);	cli_putstr("\r\n");
+	cli_hexdump(block+16, 16);	cli_putstr("\r\n");
+	cli_hexdump(block+32, 16);	cli_putstr("\r\n");
+	cli_hexdump(block+48, 16);	cli_putstr("\r\n");
+#endif	
+	
+	a[0]=state->a[0];
+	a[1]=state->a[1];
+	a[2]=state->a[2];
+	a[3]=state->a[3];
+	
+	/* round 1 */
+	uint8_t s1t[]={7,12,17,22}; // 1,-1   1,4   2,-1   3,-2
+	for(m=0;m<4;++m){
+		for(n=0;n<4;++n){
+			md5_core(a, &(((uint32_t*)block)[m*4+n]), 4-n, s1t[n],i++,0);
+		}
+	}
+	/* round 2 */
+	uint8_t s2t[]={5,9,14,20}; // 1,-3   1,1   2,-2   2,4
+	for(m=0;m<4;++m){
+		for(n=0;n<4;++n){
+			md5_core(a, &(((uint32_t*)block)[(1+m*4+n*5)&0xf]), 4-n, s2t[n],i++,1);
+		}
+	}
+	/* round 3 */
+	uint8_t s3t[]={4,11,16,23}; // 0,4   1,3   2,0   3,-1
+	for(m=0;m<4;++m){
+		for(n=0;n<4;++n){
+			md5_core(a, &(((uint32_t*)block)[(5-m*4+n*3)&0xf]), 4-n, s3t[n],i++,2);
+		}
+	}
+	/* round 4 */
+	uint8_t s4t[]={6,10,15,21}; // 1,-2   1,2   2,-1   3,-3
+	for(m=0;m<4;++m){
+		for(n=0;n<4;++n){
+			md5_core(a, &(((uint32_t*)block)[(0-m*4+n*7)&0xf]), 4-n, s4t[n],i++,3);
+		}
+	}
+	state->a[0] += a[0];
+	state->a[1] += a[1];
+	state->a[2] += a[2];
+	state->a[3] += a[3];
+	state->counter++;
+}
+
+void md5_lastBlock(md5_ctx_t *state, const void* block, uint16_t length_b){
+	uint16_t l;
+	uint8_t b[64];
+	while (length_b >= 512){
+		md5_nextBlock(state, block);
+		length_b -= 512;
+		block = ((uint8_t*)block) + 512/8;
+	}
+	memset(b, 0, 64);
+	memcpy(b, block, length_b/8);
+	/* insert padding one */
+	l=length_b/8;
+	if(length_b%8){
+		uint8_t t;
+		t = ((uint8_t*)block)[l];
+		t |= (0x80>>(length_b%8));
+		b[l]=t;
+	}else{
+		b[l]=0x80;
+	}
+	/* insert length value */
+	if(l+sizeof(uint64_t) >= 512/8){
+		md5_nextBlock(state, b);
+		state->counter--;
+		memset(b, 0, 64-8);
+	}
+	*((uint64_t*)&b[64-sizeof(uint64_t)]) = (state->counter * 512) + length_b;
+	md5_nextBlock(state, b);
+}
+
+void md5_ctx2hash(md5_hash_t* dest, const md5_ctx_t* state){
+	memcpy(dest, state->a, MD5_HASH_BYTES);
+}
+
+void md5(md5_hash_t* dest, const void* msg, uint32_t length_b){
+	md5_ctx_t ctx;
+	md5_init(&ctx);
+	while(length_b>=MD5_BLOCK_BITS){
+		md5_nextBlock(&ctx, msg);
+		msg = (uint8_t*)msg + MD5_BLOCK_BYTES;
+		length_b -= MD5_BLOCK_BITS;
+	}
+	md5_lastBlock(&ctx, msg, length_b);
+	md5_ctx2hash(dest, &ctx);
+}
+
diff --git a/md5/md5.h b/md5/md5.h
new file mode 100644
index 0000000..6b65c4a
--- /dev/null
+++ b/md5/md5.h
@@ -0,0 +1,55 @@
+/* md5.h */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/* 
+ * File:	md5.h
+ * Author:	Daniel Otte
+ * Date: 	31.07.2006
+ * License: GPL
+ * Description: Implementation of the MD5 hash algorithm as described in RFC 1321
+ * 
+ */
+
+
+#ifndef MD5_H_
+#define MD5_H_
+
+#include <stdint.h>
+
+
+#define MD5_HASH_BITS  128
+#define MD5_HASH_BYTES (MD5_HASH_BITS/8)
+#define MD5_BLOCK_BITS 512
+#define MD5_BLOCK_BYTES (MD5_BLOCK_BITS/8)
+
+
+typedef struct md5_ctx_st {
+	uint32_t a[4];
+	uint32_t counter;
+} md5_ctx_t;
+
+typedef uint8_t md5_hash_t[MD5_HASH_BYTES];
+
+ 
+void md5_init(md5_ctx_t *s);
+void md5_nextBlock(md5_ctx_t *state, const void* block);
+void md5_lastBlock(md5_ctx_t *state, const void* block, uint16_t length);
+void md5_ctx2hash(md5_hash_t* dest, const md5_ctx_t* state);
+void md5(md5_hash_t* dest, const void* msg, uint32_t length_b);
+
+#endif /*MD5_H_*/
diff --git a/md5/md5_sbox.h b/md5/md5_sbox.h
new file mode 100644
index 0000000..597b3db
--- /dev/null
+++ b/md5/md5_sbox.h
@@ -0,0 +1,40 @@
+/* md5_sbox.h */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+#ifndef MD5_SBOX_H_
+#define MD5_SBOX_H_
+
+#include <stdint.h>
+#include <avr/pgmspace.h>
+
+uint32_t md5_T[] PROGMEM = {
+	0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee, 0xf57c0faf, 
+	0x4787c62a, 0xa8304613, 0xfd469501, 0x698098d8, 0x8b44f7af, 
+	0xffff5bb1, 0x895cd7be, 0x6b901122, 0xfd987193, 0xa679438e, 
+	0x49b40821, 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa, 
+	0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8, 0x21e1cde6, 
+	0xc33707d6, 0xf4d50d87, 0x455a14ed, 0xa9e3e905, 0xfcefa3f8, 
+	0x676f02d9, 0x8d2a4c8a, 0xfffa3942, 0x8771f681, 0x6d9d6122, 
+	0xfde5380c, 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70, 
+	0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05, 0xd9d4d039, 
+	0xe6db99e5, 0x1fa27cf8, 0xc4ac5665, 0xf4292244, 0x432aff97, 
+	0xab9423a7, 0xfc93a039, 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 
+	0x85845dd1, 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1, 
+	0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391 };
+
+#endif /*MD5_SBOX_H_*/
diff --git a/md5_sbox.h b/md5_sbox.h
deleted file mode 100644
index 597b3db..0000000
--- a/md5_sbox.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/* md5_sbox.h */
-/*
-    This file is part of the AVR-Crypto-Lib.
-    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-#ifndef MD5_SBOX_H_
-#define MD5_SBOX_H_
-
-#include <stdint.h>
-#include <avr/pgmspace.h>
-
-uint32_t md5_T[] PROGMEM = {
-	0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee, 0xf57c0faf, 
-	0x4787c62a, 0xa8304613, 0xfd469501, 0x698098d8, 0x8b44f7af, 
-	0xffff5bb1, 0x895cd7be, 0x6b901122, 0xfd987193, 0xa679438e, 
-	0x49b40821, 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa, 
-	0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8, 0x21e1cde6, 
-	0xc33707d6, 0xf4d50d87, 0x455a14ed, 0xa9e3e905, 0xfcefa3f8, 
-	0x676f02d9, 0x8d2a4c8a, 0xfffa3942, 0x8771f681, 0x6d9d6122, 
-	0xfde5380c, 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70, 
-	0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05, 0xd9d4d039, 
-	0xe6db99e5, 0x1fa27cf8, 0xc4ac5665, 0xf4292244, 0x432aff97, 
-	0xab9423a7, 0xfc93a039, 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 
-	0x85845dd1, 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1, 
-	0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391 };
-
-#endif /*MD5_SBOX_H_*/
diff --git a/mickey128.c b/mickey128.c
deleted file mode 100644
index d33571e..0000000
--- a/mickey128.c
+++ /dev/null
@@ -1,276 +0,0 @@
-/**
- * 
- * author: Daniel Otte
- * email:  daniel.otte@rub.de
- * license: GPLv3
- * 
- * */
-
-#include <stdint.h>
-#include <string.h>
-#include <avr/pgmspace.h>
-#include "mickey128.h"
-#include "cli.h"
-/*
-RTAPS = { 0,4,5,  8,10,11,14  ,16,20,  25,30,  32,35,36,38,
-         42,43,46,  50,51,53,54,55,  56,57,60,61,62,63,  
-         65,66,69,  73,74,76,79,  80,81,82,85,86,  90,91,92,95,  
-         97,100,101,  105,106,107,108,109,111,  112,113,115,116,117,
-         127,  128,129,130,131,133,135,  136,137,140,142,  145,148,150,  
-         152,153,154,156,157 }
-         
-         0011.0001  0100.1101  0001.0001  0100.0010  0101.1001
-         0100.1100  1110.1100  1111.0011 
-         0010.0110  1001.0110  0110.0111  1001.1100 
-         0011.0010  1011.1110  0011.1011
-         1000.0000  1010.1111  0101.0011  0101.0010
-         0011.0111
-          
-         1000110010110010100010000100001010011010001100100011011111001111011001
-         0001101001111001100011100101001100011111011101110000000001111101011100
-         101001001010111011
-          
-         1000.1100 1011.0010 1000.1000 0100.0010 1001.1010 0011.0010 
-         0011.0111 1100.1111 0110.0100 0110.1001 1110.0110 0011.1001
-         0100.1100 0111.1101 1101.1100 0000.0001 1111.0101 1100.1010
-         0100.1010 1110.11
-*/
-
-uint8_t rtaps[] PROGMEM = {
-	0x31, 0x4D, 0x11, 0x42,
-	0x59, 0x4C, 0xEC, 0xF3,
-	0x26, 0x96, 0x67, 0x9C,
-	0x32, 0xBE, 0x3B, 0x80, 
-	0xAF, 0x53, 0x52, 0x37
-};
-	
-static 
-void memxor_P(void* d, PGM_VOID_P s, uint8_t length_B){
-	while(length_B--){
-		*((uint8_t*)d) ^= pgm_read_byte(s);
-		d = (uint8_t*)d +1;
-		s = (uint8_t*)s +1;
-	}
-}
-
-#define SHL0(a) c1=((a)>>7); (a)=(((a)<<1)|c0)
-#define SHL1(a) c0=((a)>>7); (a)=(((a)<<1)|c1)
-
-#define SHLX0(a) c1=((a)>>7); (a)^=(((a)<<1)|c0)
-#define SHLX1(a) c0=((a)>>7); (a)^=(((a)<<1)|c1)
-
-static
-void clock_r(uint8_t* r, uint8_t ibit, uint8_t cbit){
-	uint8_t i,c0=0,c1=0; /* carry */
-	ibit ^= ((r[159/8])>>(159%8))&1; /* ibit is now the same as feedback_bit */
-	if(cbit){
-		for(i=0; i<10; ++i){
-			SHLX0(r[2*i+0]);
-			SHLX1(r[2*i+1]);
-		}
-	} else {
-		for(i=0; i<10; ++i){
-			SHL0(r[2*i+0]);
-			SHL1(r[2*i+1]);
-		}
-	}
-	if(ibit){
-		memxor_P(r, rtaps, 20);
-	}
-}
-
-/* comp0 (filling spaces with zero) (one at each side)
- * 0101.1110 1111.0010 1101.0110 0101.1101 
- * 0101.0101 0000.1001 0010.0110 0111.1001
- * 0110.0010 0111.0000 0000.0000 0111.1001
- * 0011.0001 1101.1001 1010.1111 0011.0111
- * 1011.1110 0000.0110 1011.1110 0000.1111
- * --
- * 5E F2 D6 5D
- * 55 09 26 79
- * 62 70 00 79
- * 31 D9 AF 37
- * BE 06 BE 0F
- */ 
-uint8_t comp0[] PROGMEM = {
-	0x5E, 0xF2, 0xD6, 0x5D,
-	0x55, 0x09, 0x26, 0x79,
-	0x62, 0x70, 0x00, 0x79, 
-	0x31, 0xD9, 0xAF, 0x37, 
-	0xBE, 0x06, 0xBE, 0x0F
-};
-
-
-/* comp1 (shifting one bit right to make calculation easier, so inserting two zeros)
- * 0110.0000 0011.1110 0011.0010 1111.1010 
- * 0011.0000 0111.1001 0110.1100 1111.1101
- * 1100.0001 1000.0111 0000.0001 1111.1000
- * 1000.1010 1100.0110 1100.0001 1100.1100
- * 0110.1010 1011.0111 1110.1000 1111.1111
- * --
- * 60 3E 32 FA
- * 30 79 6C FD
- * C1 87 01 F8
- * 8A C6 C1 CC
- * 6A B7 E8 FF
-*/
-/*
-uint8_t comp1[] PROGMEM = {
-	0x60, 0x3E, 0x32, 0xFA, 0x30, 0x79, 0x6C, 0xFD, 0xC1, 0x87, 
-	0x01, 0xF8, 0x8A, 0xC6, 0xC1, 0xCC, 0x6A, 0xB7, 0xE8, 0xFF
-};
-*/
-/* comp1
- * 0000.1100 1111.1000 1001.1000 1011.1110
- * 0001.1001 0011.1100 0110.1101 0111.1111
- * 0000.0111 1100.0011 0000.0000 0011.1110
- * 1010.0010 1100.0111 0000.0110 0110.0110
- * 1010.1101 1101.1010 0010.1111 1111.1110
- * --
- * 0C F8 98 BE
- * 19 3C 6D 7F
- * 07 C3 00 3E
- * A2 C7 06 66
- * AD DA 2F FE
-*/
-/*
-uint8_t comp1[] PROGMEM = {
-	0x0C, 0xF8, 0x98, 0xBE, 0x19, 0x3C, 0x6D, 0x7F, 0x07, 0xC3,
-	0x00, 0x3E, 0xA2, 0xC7, 0x06, 0x66, 0xAD, 0xDA, 0x2F, 0xFE
-};
-*/
-/* comp1
- * 0011.0000 0001.1111 0001.1001 0111.1101
- * 1001.1000 0011.1100 1011.0110 1111.1110
- * 1110.0000 1100.0011 0000.0000 0111.1100
- * 0100.0101 1110.0011 0110.0000 0110.0110
- * 1011.0101 0101.1011 1111.0100 0111.1111
- * --
- * 30 1F 19 7D
- * 98 3C B6 FE
- * E0 C3 00 7C
- * 45 E3 60 66
- * B5 5B F4 7F
-*/
-
-uint8_t comp1[] PROGMEM = {
-	0x30, 0x1F, 0x19, 0x7D,
-	0x98, 0x3C, 0xB6, 0xFE, 
-	0xE0, 0xC3, 0x00, 0x7C, 
-	0x45, 0xE3, 0x60, 0x66, 
-	0xB5, 0x5B, 0xF4, 0x7F
-};
-
-/* fb0
- * 1010.1111 0001.1111 0011.1100 1100.0100
- * 0010.0010 1010.0011 0010.1111 0000.1110
- * 1000.0001 0100.1101 1110.0101 0110.0110
- * 1001.0001 0100.1011 0101.0100 1101.0100
- * 1100.0001 0000.1011 0110.0011 1000.0011
- * --
- * AF 1F 3C C4
- * 22 A3 2F 0E
- * 81 4D E5 66
- * 91 4B 54 D4
- * C1 0B 63 83
- */ 
-uint8_t fb0[] PROGMEM = {	
-	0xAF, 0x1F, 0x3C, 0xC4, 
-	0x22, 0xA3, 0x2F, 0x0E, 
-	0x81, 0x4D, 0xE5, 0x66, 
-	0x91, 0x4B, 0x54, 0xD4, 
-	0xC1, 0x0B, 0x63, 0x83
-};
-
-/* fb1
- * 1010.1011 0111.0111 1111.0100 1001.1011 
- * 1001.0000 1000.1100 0111.1001 0111.0000
- * 1011.0110 0001.1000 1001.1010 0110.1111
- * 1110.0111 0111.1110 0100.1011 0110.1100 
- * 1110.1111 1000.0000 1010.0111 0001.0001
- * --
- * AB 77 F4 9B
- * 90 8C 79 70
- * B6 18 9A 6F
- * E7 7E 4B 6C
- * EF 80 A7 11
- */ 
-uint8_t fb1[] PROGMEM = {
-	0xAB, 0x77, 0xF4, 0x9B, 
-	0x90, 0x8C, 0x79, 0x70, 
-	0xB6, 0x18, 0x9A, 0x6F, 
-	0xE7, 0x7E, 0x4B, 0x6C, 
-	0xEF, 0x80, 0xA7, 0x11
-};
-
-static
-void clock_s(uint8_t* s, uint8_t ibit, uint8_t cbit){
-	uint8_t s0[20], s1[20];
-	uint8_t i,c=0, c2=0;
-	ibit ^= (s[19])>>7;
-	memcpy(s0,s,20);
-	memxor_P(s0, comp0, 20);
-	for(i=0; i<20; ++i){
-		s1[19-i]= c|((s[19-i])>>1);
-		c = (s[19-i])<<7;
-	}
-	memxor_P(s1, comp1, 20);
-	c=0;
-	for(i=0; i<20; ++i){
-		c2=(s[i])>>7;
-		s[i]=((s[i])<<1) ^ ((s0[i])&(s1[i])) ^ c;
-		c=c2;
-	}
-	s[0] &= 0xFE;
-	if(ibit){
-		memxor_P(s, cbit?fb1:fb0, 20);
-	}
-}
-
-static
-void clock_kg(uint8_t* r, uint8_t* s, uint8_t mixing, uint8_t input){
-	uint8_t rb, sb;
-	rb = ((s[ 54/8])>>(( 54%8))) ^ ((r[106/8])>>(((106%8))));
-	sb = ((s[106/8])>>((106%8))) ^ ((r[ 53/8])>>((( 53%8))));
-	rb &= 1;
-	sb &= 1;
-	mixing = input ^ (mixing & ((s[80/8]>>((80%8))) & 1)); 
-	clock_r(r, mixing, rb);
-	clock_s(s, input, sb);
-}
-
-void mickey128_init(void* key, uint16_t keysize_b, 
-                    void* iv,  uint16_t ivsize_b, 
-                    mickey128_ctx_t* ctx){
-	uint16_t i;
-	memset(ctx->r, 0, 20);
-	memset(ctx->s, 0, 20);
-	for(i=0; i<ivsize_b; ++i){
-		clock_kg(ctx->r, ctx->s, 1, 1&((((uint8_t*)iv)[i/8])>>(7-(i%8))));
-	}
-	for(i=0; i<keysize_b; ++i){
-		clock_kg(ctx->r, ctx->s, 1, 1&((((uint8_t*)key)[i/8])>>(7-(i%8))));
-	}                  
-	for(i=0; i<160; ++i){
-		clock_kg(ctx->r, ctx->s, 1, 0);
-	}                  
-} 
-
-uint8_t mickey128_getbit(mickey128_ctx_t* ctx){
-	uint8_t ret;
-	ret = 1&(*(ctx->r) ^ *(ctx->s));
-	clock_kg(ctx->r, ctx->s, 0, 0);
-	return ret;
-}
-
-uint8_t mickey128_getbyte(mickey128_ctx_t* ctx){
-	uint8_t i,ret=0;
-	for(i=0; i<8; ++i){
-		ret<<=1;
-		ret |= 1&(((ctx->r)[0]) ^ ((ctx->s)[0]));
-		clock_kg(ctx->r, ctx->s, 0, 0);
-	}
-	return ret;
-}
-
-		
diff --git a/mickey128.h b/mickey128.h
deleted file mode 100644
index 9a125f4..0000000
--- a/mickey128.h
+++ /dev/null
@@ -1,19 +0,0 @@
-#ifndef MICKEY128_H_
-#define MICKEY128_H_
-
-#include <stdint.h>
-
-typedef struct mickey128_ctx_st{
-	uint8_t r[20];
-	uint8_t s[20];
-} mickey128_ctx_t;
-
-void debug_clock_r(void);
-void debug_clock_s(void);
-void mickey128_init(void* key, uint16_t keysize_b, 
-                    void* iv,  uint16_t ivsize_b, 
-                    mickey128_ctx_t* ctx);
-uint8_t mickey128_getbit(mickey128_ctx_t* ctx);
-uint8_t mickey128_getbyte(mickey128_ctx_t* ctx);
-
-#endif /*MICKEY128_H_*/
diff --git a/mickey128/mickey128.c b/mickey128/mickey128.c
new file mode 100644
index 0000000..d33571e
--- /dev/null
+++ b/mickey128/mickey128.c
@@ -0,0 +1,276 @@
+/**
+ * 
+ * author: Daniel Otte
+ * email:  daniel.otte@rub.de
+ * license: GPLv3
+ * 
+ * */
+
+#include <stdint.h>
+#include <string.h>
+#include <avr/pgmspace.h>
+#include "mickey128.h"
+#include "cli.h"
+/*
+RTAPS = { 0,4,5,  8,10,11,14  ,16,20,  25,30,  32,35,36,38,
+         42,43,46,  50,51,53,54,55,  56,57,60,61,62,63,  
+         65,66,69,  73,74,76,79,  80,81,82,85,86,  90,91,92,95,  
+         97,100,101,  105,106,107,108,109,111,  112,113,115,116,117,
+         127,  128,129,130,131,133,135,  136,137,140,142,  145,148,150,  
+         152,153,154,156,157 }
+         
+         0011.0001  0100.1101  0001.0001  0100.0010  0101.1001
+         0100.1100  1110.1100  1111.0011 
+         0010.0110  1001.0110  0110.0111  1001.1100 
+         0011.0010  1011.1110  0011.1011
+         1000.0000  1010.1111  0101.0011  0101.0010
+         0011.0111
+          
+         1000110010110010100010000100001010011010001100100011011111001111011001
+         0001101001111001100011100101001100011111011101110000000001111101011100
+         101001001010111011
+          
+         1000.1100 1011.0010 1000.1000 0100.0010 1001.1010 0011.0010 
+         0011.0111 1100.1111 0110.0100 0110.1001 1110.0110 0011.1001
+         0100.1100 0111.1101 1101.1100 0000.0001 1111.0101 1100.1010
+         0100.1010 1110.11
+*/
+
+uint8_t rtaps[] PROGMEM = {
+	0x31, 0x4D, 0x11, 0x42,
+	0x59, 0x4C, 0xEC, 0xF3,
+	0x26, 0x96, 0x67, 0x9C,
+	0x32, 0xBE, 0x3B, 0x80, 
+	0xAF, 0x53, 0x52, 0x37
+};
+	
+static 
+void memxor_P(void* d, PGM_VOID_P s, uint8_t length_B){
+	while(length_B--){
+		*((uint8_t*)d) ^= pgm_read_byte(s);
+		d = (uint8_t*)d +1;
+		s = (uint8_t*)s +1;
+	}
+}
+
+#define SHL0(a) c1=((a)>>7); (a)=(((a)<<1)|c0)
+#define SHL1(a) c0=((a)>>7); (a)=(((a)<<1)|c1)
+
+#define SHLX0(a) c1=((a)>>7); (a)^=(((a)<<1)|c0)
+#define SHLX1(a) c0=((a)>>7); (a)^=(((a)<<1)|c1)
+
+static
+void clock_r(uint8_t* r, uint8_t ibit, uint8_t cbit){
+	uint8_t i,c0=0,c1=0; /* carry */
+	ibit ^= ((r[159/8])>>(159%8))&1; /* ibit is now the same as feedback_bit */
+	if(cbit){
+		for(i=0; i<10; ++i){
+			SHLX0(r[2*i+0]);
+			SHLX1(r[2*i+1]);
+		}
+	} else {
+		for(i=0; i<10; ++i){
+			SHL0(r[2*i+0]);
+			SHL1(r[2*i+1]);
+		}
+	}
+	if(ibit){
+		memxor_P(r, rtaps, 20);
+	}
+}
+
+/* comp0 (filling spaces with zero) (one at each side)
+ * 0101.1110 1111.0010 1101.0110 0101.1101 
+ * 0101.0101 0000.1001 0010.0110 0111.1001
+ * 0110.0010 0111.0000 0000.0000 0111.1001
+ * 0011.0001 1101.1001 1010.1111 0011.0111
+ * 1011.1110 0000.0110 1011.1110 0000.1111
+ * --
+ * 5E F2 D6 5D
+ * 55 09 26 79
+ * 62 70 00 79
+ * 31 D9 AF 37
+ * BE 06 BE 0F
+ */ 
+uint8_t comp0[] PROGMEM = {
+	0x5E, 0xF2, 0xD6, 0x5D,
+	0x55, 0x09, 0x26, 0x79,
+	0x62, 0x70, 0x00, 0x79, 
+	0x31, 0xD9, 0xAF, 0x37, 
+	0xBE, 0x06, 0xBE, 0x0F
+};
+
+
+/* comp1 (shifting one bit right to make calculation easier, so inserting two zeros)
+ * 0110.0000 0011.1110 0011.0010 1111.1010 
+ * 0011.0000 0111.1001 0110.1100 1111.1101
+ * 1100.0001 1000.0111 0000.0001 1111.1000
+ * 1000.1010 1100.0110 1100.0001 1100.1100
+ * 0110.1010 1011.0111 1110.1000 1111.1111
+ * --
+ * 60 3E 32 FA
+ * 30 79 6C FD
+ * C1 87 01 F8
+ * 8A C6 C1 CC
+ * 6A B7 E8 FF
+*/
+/*
+uint8_t comp1[] PROGMEM = {
+	0x60, 0x3E, 0x32, 0xFA, 0x30, 0x79, 0x6C, 0xFD, 0xC1, 0x87, 
+	0x01, 0xF8, 0x8A, 0xC6, 0xC1, 0xCC, 0x6A, 0xB7, 0xE8, 0xFF
+};
+*/
+/* comp1
+ * 0000.1100 1111.1000 1001.1000 1011.1110
+ * 0001.1001 0011.1100 0110.1101 0111.1111
+ * 0000.0111 1100.0011 0000.0000 0011.1110
+ * 1010.0010 1100.0111 0000.0110 0110.0110
+ * 1010.1101 1101.1010 0010.1111 1111.1110
+ * --
+ * 0C F8 98 BE
+ * 19 3C 6D 7F
+ * 07 C3 00 3E
+ * A2 C7 06 66
+ * AD DA 2F FE
+*/
+/*
+uint8_t comp1[] PROGMEM = {
+	0x0C, 0xF8, 0x98, 0xBE, 0x19, 0x3C, 0x6D, 0x7F, 0x07, 0xC3,
+	0x00, 0x3E, 0xA2, 0xC7, 0x06, 0x66, 0xAD, 0xDA, 0x2F, 0xFE
+};
+*/
+/* comp1
+ * 0011.0000 0001.1111 0001.1001 0111.1101
+ * 1001.1000 0011.1100 1011.0110 1111.1110
+ * 1110.0000 1100.0011 0000.0000 0111.1100
+ * 0100.0101 1110.0011 0110.0000 0110.0110
+ * 1011.0101 0101.1011 1111.0100 0111.1111
+ * --
+ * 30 1F 19 7D
+ * 98 3C B6 FE
+ * E0 C3 00 7C
+ * 45 E3 60 66
+ * B5 5B F4 7F
+*/
+
+uint8_t comp1[] PROGMEM = {
+	0x30, 0x1F, 0x19, 0x7D,
+	0x98, 0x3C, 0xB6, 0xFE, 
+	0xE0, 0xC3, 0x00, 0x7C, 
+	0x45, 0xE3, 0x60, 0x66, 
+	0xB5, 0x5B, 0xF4, 0x7F
+};
+
+/* fb0
+ * 1010.1111 0001.1111 0011.1100 1100.0100
+ * 0010.0010 1010.0011 0010.1111 0000.1110
+ * 1000.0001 0100.1101 1110.0101 0110.0110
+ * 1001.0001 0100.1011 0101.0100 1101.0100
+ * 1100.0001 0000.1011 0110.0011 1000.0011
+ * --
+ * AF 1F 3C C4
+ * 22 A3 2F 0E
+ * 81 4D E5 66
+ * 91 4B 54 D4
+ * C1 0B 63 83
+ */ 
+uint8_t fb0[] PROGMEM = {	
+	0xAF, 0x1F, 0x3C, 0xC4, 
+	0x22, 0xA3, 0x2F, 0x0E, 
+	0x81, 0x4D, 0xE5, 0x66, 
+	0x91, 0x4B, 0x54, 0xD4, 
+	0xC1, 0x0B, 0x63, 0x83
+};
+
+/* fb1
+ * 1010.1011 0111.0111 1111.0100 1001.1011 
+ * 1001.0000 1000.1100 0111.1001 0111.0000
+ * 1011.0110 0001.1000 1001.1010 0110.1111
+ * 1110.0111 0111.1110 0100.1011 0110.1100 
+ * 1110.1111 1000.0000 1010.0111 0001.0001
+ * --
+ * AB 77 F4 9B
+ * 90 8C 79 70
+ * B6 18 9A 6F
+ * E7 7E 4B 6C
+ * EF 80 A7 11
+ */ 
+uint8_t fb1[] PROGMEM = {
+	0xAB, 0x77, 0xF4, 0x9B, 
+	0x90, 0x8C, 0x79, 0x70, 
+	0xB6, 0x18, 0x9A, 0x6F, 
+	0xE7, 0x7E, 0x4B, 0x6C, 
+	0xEF, 0x80, 0xA7, 0x11
+};
+
+static
+void clock_s(uint8_t* s, uint8_t ibit, uint8_t cbit){
+	uint8_t s0[20], s1[20];
+	uint8_t i,c=0, c2=0;
+	ibit ^= (s[19])>>7;
+	memcpy(s0,s,20);
+	memxor_P(s0, comp0, 20);
+	for(i=0; i<20; ++i){
+		s1[19-i]= c|((s[19-i])>>1);
+		c = (s[19-i])<<7;
+	}
+	memxor_P(s1, comp1, 20);
+	c=0;
+	for(i=0; i<20; ++i){
+		c2=(s[i])>>7;
+		s[i]=((s[i])<<1) ^ ((s0[i])&(s1[i])) ^ c;
+		c=c2;
+	}
+	s[0] &= 0xFE;
+	if(ibit){
+		memxor_P(s, cbit?fb1:fb0, 20);
+	}
+}
+
+static
+void clock_kg(uint8_t* r, uint8_t* s, uint8_t mixing, uint8_t input){
+	uint8_t rb, sb;
+	rb = ((s[ 54/8])>>(( 54%8))) ^ ((r[106/8])>>(((106%8))));
+	sb = ((s[106/8])>>((106%8))) ^ ((r[ 53/8])>>((( 53%8))));
+	rb &= 1;
+	sb &= 1;
+	mixing = input ^ (mixing & ((s[80/8]>>((80%8))) & 1)); 
+	clock_r(r, mixing, rb);
+	clock_s(s, input, sb);
+}
+
+void mickey128_init(void* key, uint16_t keysize_b, 
+                    void* iv,  uint16_t ivsize_b, 
+                    mickey128_ctx_t* ctx){
+	uint16_t i;
+	memset(ctx->r, 0, 20);
+	memset(ctx->s, 0, 20);
+	for(i=0; i<ivsize_b; ++i){
+		clock_kg(ctx->r, ctx->s, 1, 1&((((uint8_t*)iv)[i/8])>>(7-(i%8))));
+	}
+	for(i=0; i<keysize_b; ++i){
+		clock_kg(ctx->r, ctx->s, 1, 1&((((uint8_t*)key)[i/8])>>(7-(i%8))));
+	}                  
+	for(i=0; i<160; ++i){
+		clock_kg(ctx->r, ctx->s, 1, 0);
+	}                  
+} 
+
+uint8_t mickey128_getbit(mickey128_ctx_t* ctx){
+	uint8_t ret;
+	ret = 1&(*(ctx->r) ^ *(ctx->s));
+	clock_kg(ctx->r, ctx->s, 0, 0);
+	return ret;
+}
+
+uint8_t mickey128_getbyte(mickey128_ctx_t* ctx){
+	uint8_t i,ret=0;
+	for(i=0; i<8; ++i){
+		ret<<=1;
+		ret |= 1&(((ctx->r)[0]) ^ ((ctx->s)[0]));
+		clock_kg(ctx->r, ctx->s, 0, 0);
+	}
+	return ret;
+}
+
+		
diff --git a/mickey128/mickey128.h b/mickey128/mickey128.h
new file mode 100644
index 0000000..9a125f4
--- /dev/null
+++ b/mickey128/mickey128.h
@@ -0,0 +1,19 @@
+#ifndef MICKEY128_H_
+#define MICKEY128_H_
+
+#include <stdint.h>
+
+typedef struct mickey128_ctx_st{
+	uint8_t r[20];
+	uint8_t s[20];
+} mickey128_ctx_t;
+
+void debug_clock_r(void);
+void debug_clock_s(void);
+void mickey128_init(void* key, uint16_t keysize_b, 
+                    void* iv,  uint16_t ivsize_b, 
+                    mickey128_ctx_t* ctx);
+uint8_t mickey128_getbit(mickey128_ctx_t* ctx);
+uint8_t mickey128_getbyte(mickey128_ctx_t* ctx);
+
+#endif /*MICKEY128_H_*/
diff --git a/mkfiles/arcfour.mk b/mkfiles/arcfour.mk
index d62c144..ebc6858 100644
--- a/mkfiles/arcfour.mk
+++ b/mkfiles/arcfour.mk
@@ -4,6 +4,7 @@ ALGO_NAME := ARCFOUR
 # comment out the following line for removement of ARCFOUR from the build process
 STREAM_CIPHERS += $(ALGO_NAME)
 
+$(ALGO_NAME)_DIR      := arcfour/
 $(ALGO_NAME)_OBJ      := arcfour-asm.o
 $(ALGO_NAME)_TEST_BIN := main-arcfour-test.o $(CLI_STD)  \
                          nessie_stream_test.o nessie_common.o performance_test.o
diff --git a/mkfiles/arcfour_c.mk b/mkfiles/arcfour_c.mk
index 40a3a84..5bee9f0 100644
--- a/mkfiles/arcfour_c.mk
+++ b/mkfiles/arcfour_c.mk
@@ -4,10 +4,9 @@ ALGO_NAME := ARCFOUR_C
 # comment out the following line for removement of ARCFOUR from the build process
 STREAM_CIPHERS += $(ALGO_NAME)
 
+$(ALGO_NAME)_DIR      := arcfour/
 $(ALGO_NAME)_OBJ      := arcfour.o
-$(ALGO_NAME)_TEST_BIN := main-arcfour-test.o $(CLI_STD)  \
-                         nessie_stream_test.o nessie_common.o \
-                         performance_test.o
+$(ALGO_NAME)_TEST_BIN := main-arcfour-test.o $(CLI_STD) nessie_stream_test.o nessie_common.o performance_test.o
 $(ALGO_NAME)_NESSIE_TEST      := "nessie"
 $(ALGO_NAME)_PERFORMANCE_TEST := "performance"
 
diff --git a/mkfiles/base64.mk b/mkfiles/base64.mk
index 58883c3..013b7ee 100644
--- a/mkfiles/base64.mk
+++ b/mkfiles/base64.mk
@@ -4,7 +4,7 @@ ALGO_NAME := BASE64
 # comment out the following line for removement of base64 from the build process
 ENCODINGS += $(ALGO_NAME)
 
-
+$(ALGO_NAME)_DIR      := base64/
 $(ALGO_NAME)_OBJ      := base64_enc.o base64_dec.o
 $(ALGO_NAME)_TEST_BIN := main-base64-test.o $(CLI_STD)  \
                          performance_test.o noekeon_asm.o noekeon_prng.o memxor.o
diff --git a/mkfiles/bmw_c.mk b/mkfiles/bmw_c.mk
index 6305932..585bbb2 100644
--- a/mkfiles/bmw_c.mk
+++ b/mkfiles/bmw_c.mk
@@ -4,7 +4,7 @@ ALGO_NAME := BMW_C
 # comment out the following line for removement of BlueMidnightWish from the build process
 HASHES += $(ALGO_NAME)
 
-
+$(ALGO_NAME)_DIR      := bmw/
 $(ALGO_NAME)_OBJ      := bmw_small.o bmw_large.o
 $(ALGO_NAME)_TEST_BIN := main-bmw-test.o hfal_bmw_small.o hfal_bmw_large.o $(CLI_STD) $(HFAL_STD)
 $(ALGO_NAME)_NESSIE_TEST      := test nessie
diff --git a/mkfiles/cast5.mk b/mkfiles/cast5.mk
index 08ead6e..318a0e5 100644
--- a/mkfiles/cast5.mk
+++ b/mkfiles/cast5.mk
@@ -4,6 +4,7 @@ ALGO_NAME := CAST5
 # comment out the following line for removement of CAST5 from the build process
 BLOCK_CIPHERS += $(ALGO_NAME)
 
+$(ALGO_NAME)_DIR      := cast5/
 $(ALGO_NAME)_OBJ      := cast5.o
 $(ALGO_NAME)_TEST_BIN := main-cast5-test.o $(CLI_STD) \
                          nessie_bc_test.o nessie_common.o performance_test.o
diff --git a/mkfiles/cast6.mk b/mkfiles/cast6.mk
index 3f8539f..f28800a 100644
--- a/mkfiles/cast6.mk
+++ b/mkfiles/cast6.mk
@@ -4,7 +4,7 @@ ALGO_NAME := CAST6
 # comment out the following line for removement of CAST6 from the build process
 BLOCK_CIPHERS += $(ALGO_NAME)
 
-
+$(ALGO_NAME)_DIR      := cast6/
 $(ALGO_NAME)_OBJ      := cast6.o
 $(ALGO_NAME)_TEST_BIN := main-cast6-test.o $(CLI_STD) \
                          nessie_bc_test.o nessie_common.o performance_test.o
diff --git a/mkfiles/des.mk b/mkfiles/des.mk
index 5d9540d..180d9e1 100644
--- a/mkfiles/des.mk
+++ b/mkfiles/des.mk
@@ -4,6 +4,7 @@ ALGO_NAME := DES
 # comment out the following line for removement of DES from the build process
 BLOCK_CIPHERS += $(ALGO_NAME)
 
+$(ALGO_NAME)_DIR      := des/
 $(ALGO_NAME)_OBJ      := des.o
 $(ALGO_NAME)_TEST_BIN := main-des-test.o $(CLI_STD)  \
                          nessie_bc_test.o nessie_common.o performance_test.o
diff --git a/mkfiles/entropium.mk b/mkfiles/entropium.mk
index 02ad75b..e87b3de 100644
--- a/mkfiles/entropium.mk
+++ b/mkfiles/entropium.mk
@@ -4,6 +4,7 @@ ALGO_NAME := ENTROPIUM
 # comment out the following line for removement of PRNG from the build process
 PRNGS += $(ALGO_NAME)
 
+$(ALGO_NAME)_DIR      := entropium/
 $(ALGO_NAME)_OBJ      := entropium.o sha256-asm.o
 $(ALGO_NAME)_TEST_BIN := main-entropium-test.o $(CLI_STD) performance_test.o
                          
diff --git a/mkfiles/grain.mk b/mkfiles/grain.mk
index 1b0da65..5e6638e 100644
--- a/mkfiles/grain.mk
+++ b/mkfiles/grain.mk
@@ -4,6 +4,7 @@ ALGO_NAME := GRAIN
 # comment out the following line for removement of Grain from the build process
 STREAM_CIPHERS += $(ALGO_NAME)
 
+$(ALGO_NAME)_DIR      := grain/
 $(ALGO_NAME)_OBJ      := grain.o
 $(ALGO_NAME)_TEST_BIN := main-grain-test.o $(CLI_STD) \
                          nessie_stream_test.o nessie_common.o performance_test.o
diff --git a/mkfiles/hmac-md5.mk b/mkfiles/hmac-md5.mk
index 7d3f644..fd23627 100644
--- a/mkfiles/hmac-md5.mk
+++ b/mkfiles/hmac-md5.mk
@@ -4,9 +4,10 @@ ALGO_NAME := HMAC-MD5
 # comment out the following line for removement of HMAC-MD5 from the build process
 MACS += $(ALGO_NAME)
 
+$(ALGO_NAME)_DIR      := hmac-md5/
 $(ALGO_NAME)_OBJ      := hmac-md5.o md5-asm.o
 $(ALGO_NAME)_TEST_BIN := main-hmac-md5-test.o $(CLI_STD) \
-                         nessie_mac_test.o nessie_common.o base64_enc.o base64_dec.o
+                         nessie_mac_test.o nessie_common.o
 $(ALGO_NAME)_NESSIE_TEST      := "nessie"
 $(ALGO_NAME)_PERFORMANCE_TEST := "performance"
 
diff --git a/mkfiles/hmac-sha1.mk b/mkfiles/hmac-sha1.mk
index 9087400..40958a7 100644
--- a/mkfiles/hmac-sha1.mk
+++ b/mkfiles/hmac-sha1.mk
@@ -4,6 +4,7 @@ ALGO_NAME := HMAC-SHA1
 # comment out the following line for removement of HMAC-SHA1 from the build process
 MACS += $(ALGO_NAME)
 
+$(ALGO_NAME)_DIR      := hmac-sha1/
 $(ALGO_NAME)_OBJ      := hmac-sha1.o sha1-asm.o
 $(ALGO_NAME)_TEST_BIN := main-hmac-sha1-test.o $(CLI_STD) \
                          nessie_mac_test.o nessie_common.o
diff --git a/mkfiles/hmac-sha256.mk b/mkfiles/hmac-sha256.mk
index ba221ab..4b25ea9 100644
--- a/mkfiles/hmac-sha256.mk
+++ b/mkfiles/hmac-sha256.mk
@@ -4,6 +4,7 @@ ALGO_NAME := HMAC-SHA256
 # comment out the following line for removement of HMAC-SHA256 from the build process
 MACS += $(ALGO_NAME)
 
+$(ALGO_NAME)_DIR      := hmac-sha256/
 $(ALGO_NAME)_OBJ      := hmac-sha256.o sha256-asm.o
 $(ALGO_NAME)_TEST_BIN := main-hmac-sha256-test.o $(CLI_STD) \
                          nessie_mac_test.o nessie_common.o
diff --git a/mkfiles/md5.mk b/mkfiles/md5.mk
index 025e9eb..bebdaa3 100644
--- a/mkfiles/md5.mk
+++ b/mkfiles/md5.mk
@@ -4,6 +4,7 @@ ALGO_NAME := MD5
 # comment out the following line for removement of MD5 from the build process
 HASHES += $(ALGO_NAME)
 
+$(ALGO_NAME)_DIR      := md5/
 $(ALGO_NAME)_OBJ      := md5-asm.o
 $(ALGO_NAME)_TEST_BIN := main-md5-test.o hfal_md5.o $(CLI_STD) $(HFAL_STD)
 $(ALGO_NAME)_NESSIE_TEST      := "nessie"
diff --git a/mkfiles/md5_c.mk b/mkfiles/md5_c.mk
index 7b6bb1a..d7421e7 100644
--- a/mkfiles/md5_c.mk
+++ b/mkfiles/md5_c.mk
@@ -4,6 +4,7 @@ ALGO_NAME := MD5_C
 # comment out the following line for removement of MD5 from the build process
 HASHES += $(ALGO_NAME)
 
+$(ALGO_NAME)_DIR      := md5/
 $(ALGO_NAME)_OBJ      := md5.o
 $(ALGO_NAME)_TEST_BIN := main-md5-test.o hfal_md5.o $(CLI_STD) $(HFAL_STD)
 $(ALGO_NAME)_NESSIE_TEST      := "nessie"
diff --git a/mkfiles/mickey128.mk b/mkfiles/mickey128.mk
index 6c95c96..c7bc17d 100644
--- a/mkfiles/mickey128.mk
+++ b/mkfiles/mickey128.mk
@@ -4,6 +4,7 @@ ALGO_NAME := MICKEY128
 # comment out the following line for removement of Mickey128 from the build process
 STREAM_CIPHERS += $(ALGO_NAME)
 
+$(ALGO_NAME)_DIR      := mickey128/
 $(ALGO_NAME)_OBJ      := mickey128.o
 $(ALGO_NAME)_TEST_BIN := main-mickey128-test.o $(CLI_STD) \
                          nessie_stream_test.o nessie_common.o
diff --git a/mkfiles/present.mk b/mkfiles/present.mk
index 3a4012c..3c73f82 100644
--- a/mkfiles/present.mk
+++ b/mkfiles/present.mk
@@ -4,7 +4,7 @@ ALGO_NAME := PRESENT
 # comment out the following line for removement of present from the build process
 BLOCK_CIPHERS += $(ALGO_NAME)
 
-
+$(ALGO_NAME)_DIR      := present/
 $(ALGO_NAME)_OBJ      := present.o
 $(ALGO_NAME)_TEST_BIN := main-present-test.o $(CLI_STD) \
                          nessie_bc_test.o nessie_common.o performance_test.o
diff --git a/mkfiles/rc5.mk b/mkfiles/rc5.mk
index 3a5f128..be82175 100644
--- a/mkfiles/rc5.mk
+++ b/mkfiles/rc5.mk
@@ -4,7 +4,7 @@ ALGO_NAME := RC5
 # comment out the following line for removement of RC5 from the build process
 BLOCK_CIPHERS += $(ALGO_NAME)
 
-
+$(ALGO_NAME)_DIR      := rc5/
 $(ALGO_NAME)_OBJ      := rc5.o
 $(ALGO_NAME)_TEST_BIN := main-rc5-test.o $(CLI_STD) nessie_bc_test.o \
                          nessie_common.o performance_test.o
diff --git a/mkfiles/rc6.mk b/mkfiles/rc6.mk
index e0bc603..a58b138 100644
--- a/mkfiles/rc6.mk
+++ b/mkfiles/rc6.mk
@@ -4,7 +4,7 @@ ALGO_NAME := RC6
 # comment out the following line for removement of RC6 from the build process
 BLOCK_CIPHERS += $(ALGO_NAME)
 
-
+$(ALGO_NAME)_DIR      := rc6/
 $(ALGO_NAME)_OBJ      := rc6.o
 $(ALGO_NAME)_TEST_BIN := main-rc6-test.o $(CLI_STD) \
                          nessie_bc_test.o nessie_common.o performance_test.o
diff --git a/mkfiles/seed.mk b/mkfiles/seed.mk
index 1c2b605..07ceb3e 100644
--- a/mkfiles/seed.mk
+++ b/mkfiles/seed.mk
@@ -4,6 +4,7 @@ ALGO_NAME := SEED
 # comment out the following line for removement of SEED from the build process
 BLOCK_CIPHERS += $(ALGO_NAME)
 
+$(ALGO_NAME)_DIR      := seed/
 $(ALGO_NAME)_OBJ      := seed-asm.o
 $(ALGO_NAME)_TEST_BIN := main-seed-test.o $(CLI_STD)  \
                          nessie_bc_test.o nessie_common.o performance_test.o
diff --git a/mkfiles/seed_C.mk b/mkfiles/seed_C.mk
index 14aa8ac..7c77666 100644
--- a/mkfiles/seed_C.mk
+++ b/mkfiles/seed_C.mk
@@ -4,6 +4,7 @@ ALGO_NAME := SEED_C
 # comment out the following line for removement of SEED from the build process
 BLOCK_CIPHERS += $(ALGO_NAME)
 
+$(ALGO_NAME)_DIR      := seed/
 $(ALGO_NAME)_OBJ      := seed_C.o
 $(ALGO_NAME)_TEST_BIN := main-seed-test.o $(CLI_STD)  \
                          nessie_bc_test.o nessie_common.o performance_test.o
diff --git a/mkfiles/serpent-bitslice.mk b/mkfiles/serpent-bitslice.mk
index 6a9b76a..5719143 100644
--- a/mkfiles/serpent-bitslice.mk
+++ b/mkfiles/serpent-bitslice.mk
@@ -4,7 +4,7 @@ ALGO_NAME := SERPENT_BITSLICE
 # comment out the following line for removement of serpent from the build process
 BLOCK_CIPHERS += $(ALGO_NAME)
 
-
+$(ALGO_NAME)_DIR      := serpent/
 $(ALGO_NAME)_OBJ      := serpent-asm.o serpent-sboxes-bitslice-asm.o memxor.o
 $(ALGO_NAME)_TEST_BIN := main-serpent-test.o $(CLI_STD)  \
                          nessie_bc_test.o nessie_common.o performance_test.o
diff --git a/mkfiles/serpent_asm_bitslice.mk b/mkfiles/serpent_asm_bitslice.mk
index afd0868..a5956c3 100644
--- a/mkfiles/serpent_asm_bitslice.mk
+++ b/mkfiles/serpent_asm_bitslice.mk
@@ -4,7 +4,7 @@ ALGO_NAME := SERPENT_ASM_BITSLICE
 # comment out the following line for removement of serpent from the build process
 BLOCK_CIPHERS += $(ALGO_NAME)
 
-
+$(ALGO_NAME)_DIR      := serpent/
 $(ALGO_NAME)_OBJ      := serpent-sboxes-bitslice-asm.o serpent-asm.o memxor.o
 $(ALGO_NAME)_TEST_BIN := main-serpent-test.o $(CLI_STD)  \
                          nessie_bc_test.o nessie_common.o performance_test.o
diff --git a/mkfiles/serpent_asm_fast.mk b/mkfiles/serpent_asm_fast.mk
index 3e3a4fb..d9ff725 100644
--- a/mkfiles/serpent_asm_fast.mk
+++ b/mkfiles/serpent_asm_fast.mk
@@ -4,7 +4,7 @@ ALGO_NAME := SERPENT_ASM_FAST
 # comment out the following line for removement of serpent from the build process
 BLOCK_CIPHERS += $(ALGO_NAME)
 
-
+$(ALGO_NAME)_DIR      := serpent/
 $(ALGO_NAME)_OBJ      := serpent-asm.o serpent-sboxes-fast.o memxor.o
 $(ALGO_NAME)_TEST_BIN := main-serpent-test.o $(CLI_STD)  \
                          nessie_bc_test.o nessie_common.o performance_test.o
diff --git a/mkfiles/serpent_asm_small.mk b/mkfiles/serpent_asm_small.mk
index 6dde94b..4d6750e 100644
--- a/mkfiles/serpent_asm_small.mk
+++ b/mkfiles/serpent_asm_small.mk
@@ -4,7 +4,7 @@ ALGO_NAME := SERPENT_ASM_SMALL
 # comment out the following line for removement of serpent from the build process
 BLOCK_CIPHERS += $(ALGO_NAME)
 
-
+$(ALGO_NAME)_DIR      := serpent/
 $(ALGO_NAME)_OBJ      := serpent-asm.o serpent-sboxes-small.o memxor.o
 $(ALGO_NAME)_TEST_BIN := main-serpent-test.o $(CLI_STD) \
                          nessie_bc_test.o nessie_common.o performance_test.o
diff --git a/mkfiles/serpent_c.mk b/mkfiles/serpent_c.mk
index dd3a69b..f52ced4 100644
--- a/mkfiles/serpent_c.mk
+++ b/mkfiles/serpent_c.mk
@@ -4,7 +4,7 @@ ALGO_NAME := SERPENT_C
 # comment out the following line for removement of serpent from the build process
 BLOCK_CIPHERS += $(ALGO_NAME)
 
-
+$(ALGO_NAME)_DIR      := serpent/
 $(ALGO_NAME)_OBJ      := serpent.o serpent-sboxes_c.o memxor.o
 $(ALGO_NAME)_TEST_BIN := main-serpent-test.o $(CLI_STD)  \
                          nessie_bc_test.o nessie_common.o performance_test.o
diff --git a/mkfiles/sha1.mk b/mkfiles/sha1.mk
index 45df051..c986aeb 100644
--- a/mkfiles/sha1.mk
+++ b/mkfiles/sha1.mk
@@ -4,6 +4,7 @@ ALGO_NAME := SHA1
 # comment out the following line for removement of SHA1 from the build process
 HASHES += $(ALGO_NAME)
 
+$(ALGO_NAME)_DIR      := sha1/
 $(ALGO_NAME)_OBJ      := sha1-asm.o
 $(ALGO_NAME)_TEST_BIN := main-sha1-test.o hfal_sha1.o $(CLI_STD) $(HFAL_STD) dump-decl.o dump-asm.o 
 $(ALGO_NAME)_NESSIE_TEST      := "nessie"
diff --git a/mkfiles/sha1_c.mk b/mkfiles/sha1_c.mk
index 4b0b7ae..6998cbc 100644
--- a/mkfiles/sha1_c.mk
+++ b/mkfiles/sha1_c.mk
@@ -4,6 +4,7 @@ ALGO_NAME := SHA1_C
 # comment out the following line for removement of SHA1 from the build process
 HASHES += $(ALGO_NAME)
 
+$(ALGO_NAME)_DIR      := sha1/
 $(ALGO_NAME)_OBJ      := sha1.o
 $(ALGO_NAME)_TEST_BIN := main-sha1-test.o hfal_sha1.o dump-asm.o dump-decl.o $(CLI_STD) $(HFAL_STD)
 $(ALGO_NAME)_NESSIE_TEST      := "nessie"
diff --git a/mkfiles/sha256.mk b/mkfiles/sha256.mk
index 93ebdc7..9e56ed6 100644
--- a/mkfiles/sha256.mk
+++ b/mkfiles/sha256.mk
@@ -4,6 +4,7 @@ ALGO_NAME := SHA256
 # comment out the following line for removement of SHA256 from the build process
 HASHES += $(ALGO_NAME)
 
+$(ALGO_NAME)_DIR      := sha256/
 $(ALGO_NAME)_OBJ      := sha256-asm.o
 $(ALGO_NAME)_TEST_BIN := main-sha256-test.o dump-asm.o dump-decl.o hfal_sha256.o $(CLI_STD) $(HFAL_STD)
 			
diff --git a/mkfiles/sha256_c.mk b/mkfiles/sha256_c.mk
index d52fe88..6c58677 100644
--- a/mkfiles/sha256_c.mk
+++ b/mkfiles/sha256_c.mk
@@ -4,6 +4,7 @@ ALGO_NAME := SHA256_C
 # comment out the following line for removement of SHA256 from the build process
 HASHES += $(ALGO_NAME)
 
+$(ALGO_NAME)_DIR      := sha256/
 $(ALGO_NAME)_OBJ      := sha256.o
 $(ALGO_NAME)_TEST_BIN := main-sha256-test.o $(CLI_STD) $(HFAL_STD) hfal_sha256.o dump-asm.o dump-decl.o
 $(ALGO_NAME)_NESSIE_TEST      := "nessie"
diff --git a/mkfiles/shabea.mk b/mkfiles/shabea.mk
index e5f4a68..9c05cb3 100644
--- a/mkfiles/shabea.mk
+++ b/mkfiles/shabea.mk
@@ -4,6 +4,7 @@ ALGO_NAME := SHABEA
 # comment out the following line for removement of SHABEA from the build process
 BLOCK_CIPHERS += $(ALGO_NAME)
 
+$(ALGO_NAME)_DIR      := shabea/
 $(ALGO_NAME)_OBJ      := shabea.o sha256-asm.o memxor.o
 $(ALGO_NAME)_TEST_BIN := main-shabea-test.o $(CLI_STD)  \
                          nessie_bc_test.o nessie_common.o performance_test.o
diff --git a/mkfiles/shacal1enc.mk b/mkfiles/shacal1enc.mk
index 63be6c8..3003638 100644
--- a/mkfiles/shacal1enc.mk
+++ b/mkfiles/shacal1enc.mk
@@ -1,10 +1,10 @@
-# Makefile for present
+# Makefile for shacal1
 ALGO_NAME := SHACAL1ENC
 
-# comment out the following line for removement of present from the build process
+# comment out the following line for removement of shacal1 from the build process
 BLOCK_CIPHERS += $(ALGO_NAME)
 
-
+$(ALGO_NAME)_DIR      := shacal1/
 $(ALGO_NAME)_OBJ      := shacal1_enc.o sha1-asm.o
 $(ALGO_NAME)_TEST_BIN := main-shacal1_enc-test.o $(CLI_STD)  \
                          nessie_bc_test.o nessie_common.o performance_test.o 
diff --git a/mkfiles/shacal2enc.mk b/mkfiles/shacal2enc.mk
index e8a91a2..d5f0d9d 100644
--- a/mkfiles/shacal2enc.mk
+++ b/mkfiles/shacal2enc.mk
@@ -1,10 +1,10 @@
-# Makefile for present
+# Makefile for shacal2
 ALGO_NAME := SHACAL2ENC
 
-# comment out the following line for removement of present from the build process
+# comment out the following line for removement of shacal2 from the build process
 BLOCK_CIPHERS += $(ALGO_NAME)
 
-
+$(ALGO_NAME)_DIR      := shacal2/
 $(ALGO_NAME)_OBJ      := shacal2_enc.o sha256-asm.o
 $(ALGO_NAME)_TEST_BIN := main-shacal2_enc-test.o $(CLI_STD) \
                          nessie_bc_test.o nessie_common.o performance_test.o 
diff --git a/mkfiles/skipjack.mk b/mkfiles/skipjack.mk
index 56579b2..ef859df 100644
--- a/mkfiles/skipjack.mk
+++ b/mkfiles/skipjack.mk
@@ -4,6 +4,7 @@ ALGO_NAME := SKIPJACK
 # comment out the following line for removement of skipjack from the build process
 BLOCK_CIPHERS += $(ALGO_NAME)
 
+$(ALGO_NAME)_DIR      := skipjack/
 $(ALGO_NAME)_OBJ      := skipjack.o
 $(ALGO_NAME)_TEST_BIN := main-skipjack-test.o $(CLI_STD) \
                          nessie_bc_test.o nessie_common.o performance_test.o
diff --git a/mkfiles/tdes.mk b/mkfiles/tdes.mk
index 7bdc41e..09bb96c 100644
--- a/mkfiles/tdes.mk
+++ b/mkfiles/tdes.mk
@@ -1,9 +1,10 @@
-# Makefile for DES
+# Makefile for Triple-DES
 ALGO_NAME := TDES
 
-# comment out the following line for removement of DES from the build process
+# comment out the following line for removement of Triple-DES from the build process
 BLOCK_CIPHERS += $(ALGO_NAME)
 
+$(ALGO_NAME)_DIR      := des/
 $(ALGO_NAME)_OBJ      := des.o
 $(ALGO_NAME)_TEST_BIN := main-tdes-test.o $(CLI_STD) \
                          nessie_bc_test.o nessie_common.o performance_test.o
diff --git a/mkfiles/trivium.mk b/mkfiles/trivium.mk
index bbfba3b..a668ee7 100644
--- a/mkfiles/trivium.mk
+++ b/mkfiles/trivium.mk
@@ -4,6 +4,7 @@ ALGO_NAME := TRIVIUM
 # comment out the following line for removement of Trivium from the build process
 STREAM_CIPHERS += $(ALGO_NAME)
 
+$(ALGO_NAME)_DIR      := trivium/
 $(ALGO_NAME)_OBJ      := trivium.o
 $(ALGO_NAME)_TEST_BIN := main-trivium-test.o $(CLI_STD) \
                          nessie_stream_test.o nessie_common.o performance_test.o
diff --git a/mkfiles/xtea.mk b/mkfiles/xtea.mk
index 68adcfb..f2d1169 100644
--- a/mkfiles/xtea.mk
+++ b/mkfiles/xtea.mk
@@ -4,6 +4,7 @@ ALGO_NAME := XTEA
 # comment out the following line for removement of XTEA from the build process
 BLOCK_CIPHERS += $(ALGO_NAME)
 
+$(ALGO_NAME)_DIR      := xtea/
 $(ALGO_NAME)_OBJ      := xtea-asm.o
 $(ALGO_NAME)_TEST_BIN := main-xtea-test.o $(CLI_STD) \
                          nessie_bc_test.o nessie_common.o performance_test.o
diff --git a/mkfiles/xtea_c.mk b/mkfiles/xtea_c.mk
index dec8f8c..5bbd680 100644
--- a/mkfiles/xtea_c.mk
+++ b/mkfiles/xtea_c.mk
@@ -4,6 +4,7 @@ ALGO_NAME := XTEA_C
 # comment out the following line for removement of XTEA from the build process
 BLOCK_CIPHERS += $(ALGO_NAME)
 
+$(ALGO_NAME)_DIR      := xtea/
 $(ALGO_NAME)_OBJ      := xtea.o
 $(ALGO_NAME)_TEST_BIN := main-xtea-test.o $(CLI_STD) \
                          nessie_bc_test.o nessie_common.o performance_test.o
diff --git a/present.c b/present.c
deleted file mode 100644
index 03792c5..0000000
--- a/present.c
+++ /dev/null
@@ -1,123 +0,0 @@
-/* present.c */
-/*
-    This file is part of the AVR-Crypto-Lib.
-    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-/**
- * present.c
- * a implementation of the PRESENT block-cipher
- * author: Daniel Otte
- * email:  daniel.otte@rub.de
- * license: GPLv3
- * 
- * */
- 
-#include <string.h>
-#include <stdint.h> 
-#include "present.h"
-
-static uint8_t sbox(uint8_t b){
-	uint8_t sb[]={0xC, 0x5, 0x6, 0xB, 
-		 		  0x9, 0x0, 0xA, 0xD, 
-				  0x3, 0xE, 0xF, 0x8, 
-				  0x4, 0x7, 0x1, 0x2 };
-	return (((sb[b>>4])<<4)|(sb[b&0xf]));
-}
-
-static uint8_t sbox_inv(uint8_t b){
-	uint8_t sb[]={0x5, 0xE, 0xF, 0x8, 
-				  0xC, 0x1, 0x2, 0xD, 
-				  0xB, 0x4, 0x6, 0x3, 
-				  0x0, 0x7, 0x9, 0xA };
-	return (((sb[b>>4])<<4)|(sb[b&0xf]));
-}
-
-#define SHR_O(a) c=(a)&1; (a)>>=1;
-#define SHR_I(a) (a)=(c?0x8000:0x0000) | ((a)>>1);
-
-static void p(uint16_t* o, uint8_t* i){
-	uint8_t c;
-	uint8_t m,n;
-	for(m=0; m<8; ++m){
-		for(n=0; n<2; ++n){
-			SHR_O(i[m]);
-			SHR_I(o[0]);
-			SHR_O(i[m]);
-			SHR_I(o[1]);
-			SHR_O(i[m]);
-			SHR_I(o[2]);
-			SHR_O(i[m]);
-			SHR_I(o[3]);
-		}
-	}
-}
-
-static void p_inv(uint8_t* o, uint8_t* i){
-	uint8_t tmp[8];
-	p((uint16_t*)tmp, i);
-	p((uint16_t*)o, tmp);
-}
-
-void present_init(const uint8_t* key, uint8_t keysize_b, present_ctx_t* ctx){
-	uint8_t buffer[10], tmp[2];
-	uint8_t i;
-	memcpy(buffer, key, 10);
-	memcpy(&(ctx->k[0]), buffer+2, 8);
-	for(i=1; i<32; ++i){
-		/* rotate buffer 19 right */
-		memcpy(tmp, buffer, 2);
-		memmove(buffer, buffer+2, 8);
-		memcpy(buffer+8, tmp, 2);
-		 /* three shifts to do*/
-		tmp[1]=buffer[0];
-		*((uint64_t*)buffer)>>=3;
-		*((uint16_t*)(buffer+8))>>=3;
-		buffer[9] |= tmp[1]<<5;
-		buffer[7] |= tmp[0]<<5;
-		/* rotating done now substitution */
-		buffer[9] = (sbox(buffer[9])&0xF0) | ((buffer[9])&0x0F);
-		/* xor with round counter */
-		*((uint16_t*)(buffer+1)) ^= (uint16_t)i<<7;
-		memcpy(&(ctx->k[i]), buffer+2, 8);
-	}
-}
-
-void present_enc(void* buffer, present_ctx_t* ctx){
-	uint8_t i,j,tmp[8];
-	for(i=0; i<31; ++i){
-		*((uint64_t*)buffer) ^= ctx->k[i];
-		 for(j=0; j<8; ++j){
-		 	tmp[j] = sbox(((uint8_t*)buffer)[j]);
-		 }
-		 p((uint16_t*)buffer, tmp);
-	}
-	*((uint64_t*)buffer) ^= ctx->k[31];
-}
-
-
-void present_dec(void* buffer, present_ctx_t* ctx){
-	uint8_t j,tmp[8];
-	int8_t i;
-	*((uint64_t*)buffer) ^= ctx->k[31];
-
-	for(i=30; i>=0; --i){ 
-		p_inv(tmp, (uint8_t*)buffer);
-		for(j=0; j<8; ++j){
-		 	((uint8_t*)buffer)[j] = sbox_inv(tmp[j]);
-		}
-		*((uint64_t*)buffer) ^= ctx->k[i];
-	}
-}
diff --git a/present.h b/present.h
deleted file mode 100644
index 320a1d8..0000000
--- a/present.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/* present.h */
-/*
-    This file is part of the AVR-Crypto-Lib.
-    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-#ifndef PRESENT_H_
-#define PRESENT_H_
-
-#include <stdint.h>
-
-typedef struct present_ctx_st{
-	uint64_t k[32];
-} present_ctx_t;
-
-
-void present_init(const uint8_t* key, uint8_t keysize_b, present_ctx_t* ctx);
-void present_enc(void* buffer, present_ctx_t* ctx);
-void present_dec(void* buffer, present_ctx_t* ctx);
-
-
-#endif /*PRESENT_H_*/
diff --git a/present/present.c b/present/present.c
new file mode 100644
index 0000000..03792c5
--- /dev/null
+++ b/present/present.c
@@ -0,0 +1,123 @@
+/* present.c */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/**
+ * present.c
+ * a implementation of the PRESENT block-cipher
+ * author: Daniel Otte
+ * email:  daniel.otte@rub.de
+ * license: GPLv3
+ * 
+ * */
+ 
+#include <string.h>
+#include <stdint.h> 
+#include "present.h"
+
+static uint8_t sbox(uint8_t b){
+	uint8_t sb[]={0xC, 0x5, 0x6, 0xB, 
+		 		  0x9, 0x0, 0xA, 0xD, 
+				  0x3, 0xE, 0xF, 0x8, 
+				  0x4, 0x7, 0x1, 0x2 };
+	return (((sb[b>>4])<<4)|(sb[b&0xf]));
+}
+
+static uint8_t sbox_inv(uint8_t b){
+	uint8_t sb[]={0x5, 0xE, 0xF, 0x8, 
+				  0xC, 0x1, 0x2, 0xD, 
+				  0xB, 0x4, 0x6, 0x3, 
+				  0x0, 0x7, 0x9, 0xA };
+	return (((sb[b>>4])<<4)|(sb[b&0xf]));
+}
+
+#define SHR_O(a) c=(a)&1; (a)>>=1;
+#define SHR_I(a) (a)=(c?0x8000:0x0000) | ((a)>>1);
+
+static void p(uint16_t* o, uint8_t* i){
+	uint8_t c;
+	uint8_t m,n;
+	for(m=0; m<8; ++m){
+		for(n=0; n<2; ++n){
+			SHR_O(i[m]);
+			SHR_I(o[0]);
+			SHR_O(i[m]);
+			SHR_I(o[1]);
+			SHR_O(i[m]);
+			SHR_I(o[2]);
+			SHR_O(i[m]);
+			SHR_I(o[3]);
+		}
+	}
+}
+
+static void p_inv(uint8_t* o, uint8_t* i){
+	uint8_t tmp[8];
+	p((uint16_t*)tmp, i);
+	p((uint16_t*)o, tmp);
+}
+
+void present_init(const uint8_t* key, uint8_t keysize_b, present_ctx_t* ctx){
+	uint8_t buffer[10], tmp[2];
+	uint8_t i;
+	memcpy(buffer, key, 10);
+	memcpy(&(ctx->k[0]), buffer+2, 8);
+	for(i=1; i<32; ++i){
+		/* rotate buffer 19 right */
+		memcpy(tmp, buffer, 2);
+		memmove(buffer, buffer+2, 8);
+		memcpy(buffer+8, tmp, 2);
+		 /* three shifts to do*/
+		tmp[1]=buffer[0];
+		*((uint64_t*)buffer)>>=3;
+		*((uint16_t*)(buffer+8))>>=3;
+		buffer[9] |= tmp[1]<<5;
+		buffer[7] |= tmp[0]<<5;
+		/* rotating done now substitution */
+		buffer[9] = (sbox(buffer[9])&0xF0) | ((buffer[9])&0x0F);
+		/* xor with round counter */
+		*((uint16_t*)(buffer+1)) ^= (uint16_t)i<<7;
+		memcpy(&(ctx->k[i]), buffer+2, 8);
+	}
+}
+
+void present_enc(void* buffer, present_ctx_t* ctx){
+	uint8_t i,j,tmp[8];
+	for(i=0; i<31; ++i){
+		*((uint64_t*)buffer) ^= ctx->k[i];
+		 for(j=0; j<8; ++j){
+		 	tmp[j] = sbox(((uint8_t*)buffer)[j]);
+		 }
+		 p((uint16_t*)buffer, tmp);
+	}
+	*((uint64_t*)buffer) ^= ctx->k[31];
+}
+
+
+void present_dec(void* buffer, present_ctx_t* ctx){
+	uint8_t j,tmp[8];
+	int8_t i;
+	*((uint64_t*)buffer) ^= ctx->k[31];
+
+	for(i=30; i>=0; --i){ 
+		p_inv(tmp, (uint8_t*)buffer);
+		for(j=0; j<8; ++j){
+		 	((uint8_t*)buffer)[j] = sbox_inv(tmp[j]);
+		}
+		*((uint64_t*)buffer) ^= ctx->k[i];
+	}
+}
diff --git a/present/present.h b/present/present.h
new file mode 100644
index 0000000..320a1d8
--- /dev/null
+++ b/present/present.h
@@ -0,0 +1,34 @@
+/* present.h */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+#ifndef PRESENT_H_
+#define PRESENT_H_
+
+#include <stdint.h>
+
+typedef struct present_ctx_st{
+	uint64_t k[32];
+} present_ctx_t;
+
+
+void present_init(const uint8_t* key, uint8_t keysize_b, present_ctx_t* ctx);
+void present_enc(void* buffer, present_ctx_t* ctx);
+void present_dec(void* buffer, present_ctx_t* ctx);
+
+
+#endif /*PRESENT_H_*/
diff --git a/rc5.c b/rc5.c
deleted file mode 100644
index 441f61d..0000000
--- a/rc5.c
+++ /dev/null
@@ -1,98 +0,0 @@
-/* rc5.c */
-/*
-    This file is part of the AVR-Crypto-Lib.
-    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-/* rc5.c a C implementation of RC5 for AVR microcontrollers
- * 
- * author: Daniel Otte 
- * email:  daniel.otte@rub.de
- * license: GPLv3
- * 
- * this implementation is limited to 64bit blocks and a maximum of 255 rounds
- * 
- */
-
-#include <stdint.h>
-#include <stdlib.h> /* malloc() & free() */
-#include <string.h> /* memset() & memcpy() */
-#include "rc5.h" 
- 
-
-#define A (((uint32_t*)buffer)[0])
-#define B (((uint32_t*)buffer)[1])
-#define ROTL32(v,n) (((v)<<(n))|((v)>>(32-(n))))
-#define ROTR32(v,n) (((v)>>(n))|((v)<<(32-(n))))
-
-void rc5_enc(void* buffer, const rc5_ctx_t* ctx){
-	uint8_t i;
-	A += ctx->s[0];
-	B += ctx->s[1];
-	for(i=0; i<ctx->rounds; ++i){
-		A = ROTL32(A^B, B&31) + ctx->s[(i+1)*2+0];
-		B = ROTL32(A^B, A&31) + ctx->s[(i+1)*2+1];
-	} 
-}
-
-void rc5_dec(void* buffer, const rc5_ctx_t* ctx){
-	uint8_t i;
-	for(i=ctx->rounds; i>0; --i){
-		B = ROTR32(B - ctx->s[i*2+1], A&31) ^ A;
-		A = ROTR32(A - ctx->s[i*2+0], B&31) ^ B;
-	} 
-	B -= ctx->s[1];
-	A -= ctx->s[0];
-}
-/*
-P32 = 10110111111000010101000101100011 = b7e15163
-Q32 = 10011110001101110111100110111001 = 9e3779b9
-*/
-#define P32 0xb7e15163
-#define Q32 0x9e3779b9
-
-
-void rc5_init(void* key, uint16_t keysize_b, uint8_t rounds, rc5_ctx_t* ctx){
-	uint16_t c,n,m,j,i,t;
-	uint32_t a,b,l[(keysize_b+31)/32];
-	ctx->rounds = rounds;
-	t=2*(rounds+1);
-	c=(keysize_b+31)/32;
-	ctx->s = malloc(t*sizeof(uint32_t));
-	
-	memset(l, 0, sizeof(uint32_t)*c);
-	memcpy(l, key, (keysize_b+7)/8);
-	
-	ctx->s[0] = P32;
-	for(i=1; i<t; ++i){
-		ctx->s[i] = ctx->s[i-1] + Q32;
-	}
-	
-	m = ((t>c)?t:c)*3;
-	i=j=0;
-	a=b=0;
-	for(n=0; n<m; ++n){
-		a=ctx->s[i]=ROTL32(ctx->s[i]+a+b, 3);
-		b=l[j]=ROTL32(l[j]+a+b, (a+b)&31);
-		i=(i+1)%t;
-		j=(j+1)%c;
-	}
-}
-
-void rc5_free(rc5_ctx_t* ctx){
-	if(ctx->s)
-		free(ctx->s);
-}
-
diff --git a/rc5.h b/rc5.h
deleted file mode 100644
index 2a0182c..0000000
--- a/rc5.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/* rc5.h */
-/*
-    This file is part of the AVR-Crypto-Lib.
-    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-/* rc5.h a C implementation of RC5 for AVR microcontrollers
- * 
- * author: Daniel Otte 
- * email:  daniel.otte@rub.de
- * license: GPLv3
- * 
- * this implementation is limited to 64bit blocks and a maximum of 255 rounds
- * 
- */
-#ifndef RC5_H_
-#define RC5_H_
-
-
-#include <stdint.h>
-#include <stdlib.h> /* malloc() & free() */
-#include <string.h> /* memset() & memcpy() */
- 
-typedef struct rc5_ctx_st {
-	uint8_t rounds;
-	uint32_t *s;
-}rc5_ctx_t; 
-
-void rc5_enc(void* buffer, const rc5_ctx_t* ctx);
-void rc5_dec(void* buffer, const rc5_ctx_t* ctx);
-void rc5_init(void* key, uint16_t keysize_b, uint8_t rounds, rc5_ctx_t* ctx);
-void rc5_free(rc5_ctx_t* ctx);
-
-#endif /*RC5_H_*/
diff --git a/rc5/rc5.c b/rc5/rc5.c
new file mode 100644
index 0000000..441f61d
--- /dev/null
+++ b/rc5/rc5.c
@@ -0,0 +1,98 @@
+/* rc5.c */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/* rc5.c a C implementation of RC5 for AVR microcontrollers
+ * 
+ * author: Daniel Otte 
+ * email:  daniel.otte@rub.de
+ * license: GPLv3
+ * 
+ * this implementation is limited to 64bit blocks and a maximum of 255 rounds
+ * 
+ */
+
+#include <stdint.h>
+#include <stdlib.h> /* malloc() & free() */
+#include <string.h> /* memset() & memcpy() */
+#include "rc5.h" 
+ 
+
+#define A (((uint32_t*)buffer)[0])
+#define B (((uint32_t*)buffer)[1])
+#define ROTL32(v,n) (((v)<<(n))|((v)>>(32-(n))))
+#define ROTR32(v,n) (((v)>>(n))|((v)<<(32-(n))))
+
+void rc5_enc(void* buffer, const rc5_ctx_t* ctx){
+	uint8_t i;
+	A += ctx->s[0];
+	B += ctx->s[1];
+	for(i=0; i<ctx->rounds; ++i){
+		A = ROTL32(A^B, B&31) + ctx->s[(i+1)*2+0];
+		B = ROTL32(A^B, A&31) + ctx->s[(i+1)*2+1];
+	} 
+}
+
+void rc5_dec(void* buffer, const rc5_ctx_t* ctx){
+	uint8_t i;
+	for(i=ctx->rounds; i>0; --i){
+		B = ROTR32(B - ctx->s[i*2+1], A&31) ^ A;
+		A = ROTR32(A - ctx->s[i*2+0], B&31) ^ B;
+	} 
+	B -= ctx->s[1];
+	A -= ctx->s[0];
+}
+/*
+P32 = 10110111111000010101000101100011 = b7e15163
+Q32 = 10011110001101110111100110111001 = 9e3779b9
+*/
+#define P32 0xb7e15163
+#define Q32 0x9e3779b9
+
+
+void rc5_init(void* key, uint16_t keysize_b, uint8_t rounds, rc5_ctx_t* ctx){
+	uint16_t c,n,m,j,i,t;
+	uint32_t a,b,l[(keysize_b+31)/32];
+	ctx->rounds = rounds;
+	t=2*(rounds+1);
+	c=(keysize_b+31)/32;
+	ctx->s = malloc(t*sizeof(uint32_t));
+	
+	memset(l, 0, sizeof(uint32_t)*c);
+	memcpy(l, key, (keysize_b+7)/8);
+	
+	ctx->s[0] = P32;
+	for(i=1; i<t; ++i){
+		ctx->s[i] = ctx->s[i-1] + Q32;
+	}
+	
+	m = ((t>c)?t:c)*3;
+	i=j=0;
+	a=b=0;
+	for(n=0; n<m; ++n){
+		a=ctx->s[i]=ROTL32(ctx->s[i]+a+b, 3);
+		b=l[j]=ROTL32(l[j]+a+b, (a+b)&31);
+		i=(i+1)%t;
+		j=(j+1)%c;
+	}
+}
+
+void rc5_free(rc5_ctx_t* ctx){
+	if(ctx->s)
+		free(ctx->s);
+}
+
diff --git a/rc5/rc5.h b/rc5/rc5.h
new file mode 100644
index 0000000..2a0182c
--- /dev/null
+++ b/rc5/rc5.h
@@ -0,0 +1,46 @@
+/* rc5.h */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/* rc5.h a C implementation of RC5 for AVR microcontrollers
+ * 
+ * author: Daniel Otte 
+ * email:  daniel.otte@rub.de
+ * license: GPLv3
+ * 
+ * this implementation is limited to 64bit blocks and a maximum of 255 rounds
+ * 
+ */
+#ifndef RC5_H_
+#define RC5_H_
+
+
+#include <stdint.h>
+#include <stdlib.h> /* malloc() & free() */
+#include <string.h> /* memset() & memcpy() */
+ 
+typedef struct rc5_ctx_st {
+	uint8_t rounds;
+	uint32_t *s;
+}rc5_ctx_t; 
+
+void rc5_enc(void* buffer, const rc5_ctx_t* ctx);
+void rc5_dec(void* buffer, const rc5_ctx_t* ctx);
+void rc5_init(void* key, uint16_t keysize_b, uint8_t rounds, rc5_ctx_t* ctx);
+void rc5_free(rc5_ctx_t* ctx);
+
+#endif /*RC5_H_*/
diff --git a/rc6.c b/rc6.c
deleted file mode 100644
index ea4d7bb..0000000
--- a/rc6.c
+++ /dev/null
@@ -1,148 +0,0 @@
-/* rc6.c */
-/*
-    This file is part of the AVR-Crypto-Lib.
-    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-/* 
- * File:	rc6.c
- * Author:	Daniel Otte
- * Date: 	06.08.2006
- * License: GPL
- * Description: Implementation of the RC6 cipher algorithm.
- * 	This implementation is restricted to 32-bit words and to keys up to 65535 bit in length (but this is
- *  quite easy to expand), but free in the choice of number of rounds (0 to 125).
- * 	so it is RC6-32/r/b
- * THIS ONLY WORKS FOR LITTEL ENDIAN!!!
- */
- 
-#include <stdint.h>
-#include <stdlib.h>
-#include "rc6.h"
-#include "config.h"
- 
-#define P32 0xB7E15163		/* e -2 */
-#define Q32 0x9E3779B9		/* Golden Ratio -1 */
- 
-uint32_t rotl32(uint32_t a, uint8_t n){
- 	n &= 0x1f; /* higher rotates would not bring anything */
- 	return ( (a<<n)| (a>>(32-n)) );
-}
-
-uint32_t rotr32(uint32_t a, uint8_t n){
- 	n &= 0x1f; /* higher rotates would not bring anything */
- 	return ( (a>>n)| (a<<(32-n)) );
-}
- 
-uint8_t rc6_init(void* key, uint16_t keylength_b, rc6_ctx_t *s){
- 	return rc6_initl(key, keylength_b, 20, s);
-}
- 
- 
-uint8_t rc6_initl(void* key, uint16_t keylength_b, uint8_t rounds, rc6_ctx_t *s){
- 	uint8_t i,j;
- 	uint16_t v,p,c;
- 	uint32_t a,b, l=0;
- 	if (rounds>125)
- 		return 2;
- 	if(!(s->S=malloc((2*rounds+4)*sizeof(uint32_t))))
- 		return 1;
- 	
- 	s->rounds=rounds;
- 	
- 	c = keylength_b/32;
- 	if (keylength_b%32){ 
- 		++c;
- 		j=(keylength_b%32)/8;
- 		if(keylength_b%8)
- 			++j;
- 		for (i=0; i<j; ++i) 
- 			((uint8_t*)&l)[i] = ((uint8_t*)key)[(c-1)*4 + i];
- 	} else {
- 		l = ((uint32_t*)key)[c-1];
- 	}
- 	
- 	s->S[0] = P32;
- 	for(i=1; i<2*rounds+4; ++i){
-		s->S[i] = s->S[i-1] + Q32;
-	}
-	
-	a=b=j=i=0;
-	v = 3 * ((c > 2*rounds+4)?c:(2*rounds+4));
-	for(p=1; p<=v; ++p){
-		a = s->S[i] = rotl32(s->S[i] + a + b, 3);
-		if (j==c-1){
-			b = l = rotl32(l+a+b, a+b);
-		} else {
-			b = ((uint32_t*)key)[j] = rotl32(((uint32_t*)key)[j]+a+b, a+b);
-		}
-		i = (i+1) % (2*rounds+4);
-		j = (j+1) % c;
-	}
-	return 0;
-}
- 
-void rc6_free(rc6_ctx_t *s){
- 	free(s->S);
-} 
- 
-#define LG_W 5
-#define A (((uint32_t*)block)[0])
-#define B (((uint32_t*)block)[1])
-#define C (((uint32_t*)block)[2])
-#define D (((uint32_t*)block)[3])
- 
-void rc6_enc(void* block, rc6_ctx_t *s){
- 	uint8_t i;
- 	uint32_t t,u,x; /* greetings to Linux? */
- 	B += s->S[0];
- 	D += s->S[1];
- 	for (i=1; i<=s->rounds; ++i){
- 		t = rotl32(B * (2*B+1), LG_W);
- 		u = rotl32(D * (2*D+1), LG_W);
- 		A = rotl32((A ^ t), u) + s->S[2*i];
- 		C = rotl32((C ^ u), t) + s->S[2*i+1];
- 		x = A;
- 		A = B;
- 		B = C;
- 		C = D;
- 		D = x;
- 	}
- 	A += s->S[2*s->rounds+2];
- 	C += s->S[2*s->rounds+3];
-}
- 
-void rc6_dec(void* block, rc6_ctx_t *s){
- 	uint8_t i;
- 	uint32_t t,u,x; /* greetings to Linux? */
- 
- 	C -= s->S[2*s->rounds+3];
- 	A -= s->S[2*s->rounds+2];
- 	
- 	for (i=s->rounds; i>0; --i){
- 		x=D;
- 		D=C;
- 		C=B;
- 		B=A;
- 		A=x;
- 		u = rotl32(D * (2*D+1), LG_W);
- 		t = rotl32(B * (2*B+1), LG_W);
- 		C = rotr32(C - s->S[2*i+1], t) ^ u;
- 		A = rotr32(A - s->S[2*i+0], u) ^ t;
- 	}
- 	D -= s->S[1];
- 	B -= s->S[0];
-}
- 
diff --git a/rc6.h b/rc6.h
deleted file mode 100644
index 429a1d3..0000000
--- a/rc6.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/* rc6.h */
-/*
-    This file is part of the AVR-Crypto-Lib.
-    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-/* 
- * File:	rc6.h
- * Author:	Daniel Otte
- * Date: 	06.08.2006
- * License: GPL
- * Description: Implementation of the RC6 cipher algorithm.
- * 	This implementation is restricted to 32-bit words, but free in the choice of number of rounds (0 to 255).
- * 	so it is RC6-32/r/b
- */
-
-#ifndef RC6_H_
-#define RC6_H_
-
- 
-#include <stdint.h>
- 
-typedef struct rc6_ctx_st{
-	uint8_t		rounds;		/* specifys the number of rounds; default: 20 */
- 	uint32_t*	S;			/* the round-keys */
-} rc6_ctx_t;
- 
- 
-uint8_t rc6_init(void* key, uint16_t keylength_b, rc6_ctx_t *s);
- 
-uint8_t rc6_initl(void* key, uint16_t keylength_b, uint8_t rounds, rc6_ctx_t *s);
- 
-void rc6_enc(void* block, rc6_ctx_t *s);
-
-void rc6_dec(void* block, rc6_ctx_t *s);
- 
-void rc6_free(rc6_ctx_t *s); 
-
-#endif /* RC6_H_ */
-
diff --git a/rc6/rc6.c b/rc6/rc6.c
new file mode 100644
index 0000000..ea4d7bb
--- /dev/null
+++ b/rc6/rc6.c
@@ -0,0 +1,148 @@
+/* rc6.c */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/* 
+ * File:	rc6.c
+ * Author:	Daniel Otte
+ * Date: 	06.08.2006
+ * License: GPL
+ * Description: Implementation of the RC6 cipher algorithm.
+ * 	This implementation is restricted to 32-bit words and to keys up to 65535 bit in length (but this is
+ *  quite easy to expand), but free in the choice of number of rounds (0 to 125).
+ * 	so it is RC6-32/r/b
+ * THIS ONLY WORKS FOR LITTEL ENDIAN!!!
+ */
+ 
+#include <stdint.h>
+#include <stdlib.h>
+#include "rc6.h"
+#include "config.h"
+ 
+#define P32 0xB7E15163		/* e -2 */
+#define Q32 0x9E3779B9		/* Golden Ratio -1 */
+ 
+uint32_t rotl32(uint32_t a, uint8_t n){
+ 	n &= 0x1f; /* higher rotates would not bring anything */
+ 	return ( (a<<n)| (a>>(32-n)) );
+}
+
+uint32_t rotr32(uint32_t a, uint8_t n){
+ 	n &= 0x1f; /* higher rotates would not bring anything */
+ 	return ( (a>>n)| (a<<(32-n)) );
+}
+ 
+uint8_t rc6_init(void* key, uint16_t keylength_b, rc6_ctx_t *s){
+ 	return rc6_initl(key, keylength_b, 20, s);
+}
+ 
+ 
+uint8_t rc6_initl(void* key, uint16_t keylength_b, uint8_t rounds, rc6_ctx_t *s){
+ 	uint8_t i,j;
+ 	uint16_t v,p,c;
+ 	uint32_t a,b, l=0;
+ 	if (rounds>125)
+ 		return 2;
+ 	if(!(s->S=malloc((2*rounds+4)*sizeof(uint32_t))))
+ 		return 1;
+ 	
+ 	s->rounds=rounds;
+ 	
+ 	c = keylength_b/32;
+ 	if (keylength_b%32){ 
+ 		++c;
+ 		j=(keylength_b%32)/8;
+ 		if(keylength_b%8)
+ 			++j;
+ 		for (i=0; i<j; ++i) 
+ 			((uint8_t*)&l)[i] = ((uint8_t*)key)[(c-1)*4 + i];
+ 	} else {
+ 		l = ((uint32_t*)key)[c-1];
+ 	}
+ 	
+ 	s->S[0] = P32;
+ 	for(i=1; i<2*rounds+4; ++i){
+		s->S[i] = s->S[i-1] + Q32;
+	}
+	
+	a=b=j=i=0;
+	v = 3 * ((c > 2*rounds+4)?c:(2*rounds+4));
+	for(p=1; p<=v; ++p){
+		a = s->S[i] = rotl32(s->S[i] + a + b, 3);
+		if (j==c-1){
+			b = l = rotl32(l+a+b, a+b);
+		} else {
+			b = ((uint32_t*)key)[j] = rotl32(((uint32_t*)key)[j]+a+b, a+b);
+		}
+		i = (i+1) % (2*rounds+4);
+		j = (j+1) % c;
+	}
+	return 0;
+}
+ 
+void rc6_free(rc6_ctx_t *s){
+ 	free(s->S);
+} 
+ 
+#define LG_W 5
+#define A (((uint32_t*)block)[0])
+#define B (((uint32_t*)block)[1])
+#define C (((uint32_t*)block)[2])
+#define D (((uint32_t*)block)[3])
+ 
+void rc6_enc(void* block, rc6_ctx_t *s){
+ 	uint8_t i;
+ 	uint32_t t,u,x; /* greetings to Linux? */
+ 	B += s->S[0];
+ 	D += s->S[1];
+ 	for (i=1; i<=s->rounds; ++i){
+ 		t = rotl32(B * (2*B+1), LG_W);
+ 		u = rotl32(D * (2*D+1), LG_W);
+ 		A = rotl32((A ^ t), u) + s->S[2*i];
+ 		C = rotl32((C ^ u), t) + s->S[2*i+1];
+ 		x = A;
+ 		A = B;
+ 		B = C;
+ 		C = D;
+ 		D = x;
+ 	}
+ 	A += s->S[2*s->rounds+2];
+ 	C += s->S[2*s->rounds+3];
+}
+ 
+void rc6_dec(void* block, rc6_ctx_t *s){
+ 	uint8_t i;
+ 	uint32_t t,u,x; /* greetings to Linux? */
+ 
+ 	C -= s->S[2*s->rounds+3];
+ 	A -= s->S[2*s->rounds+2];
+ 	
+ 	for (i=s->rounds; i>0; --i){
+ 		x=D;
+ 		D=C;
+ 		C=B;
+ 		B=A;
+ 		A=x;
+ 		u = rotl32(D * (2*D+1), LG_W);
+ 		t = rotl32(B * (2*B+1), LG_W);
+ 		C = rotr32(C - s->S[2*i+1], t) ^ u;
+ 		A = rotr32(A - s->S[2*i+0], u) ^ t;
+ 	}
+ 	D -= s->S[1];
+ 	B -= s->S[0];
+}
+ 
diff --git a/rc6/rc6.h b/rc6/rc6.h
new file mode 100644
index 0000000..429a1d3
--- /dev/null
+++ b/rc6/rc6.h
@@ -0,0 +1,52 @@
+/* rc6.h */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/* 
+ * File:	rc6.h
+ * Author:	Daniel Otte
+ * Date: 	06.08.2006
+ * License: GPL
+ * Description: Implementation of the RC6 cipher algorithm.
+ * 	This implementation is restricted to 32-bit words, but free in the choice of number of rounds (0 to 255).
+ * 	so it is RC6-32/r/b
+ */
+
+#ifndef RC6_H_
+#define RC6_H_
+
+ 
+#include <stdint.h>
+ 
+typedef struct rc6_ctx_st{
+	uint8_t		rounds;		/* specifys the number of rounds; default: 20 */
+ 	uint32_t*	S;			/* the round-keys */
+} rc6_ctx_t;
+ 
+ 
+uint8_t rc6_init(void* key, uint16_t keylength_b, rc6_ctx_t *s);
+ 
+uint8_t rc6_initl(void* key, uint16_t keylength_b, uint8_t rounds, rc6_ctx_t *s);
+ 
+void rc6_enc(void* block, rc6_ctx_t *s);
+
+void rc6_dec(void* block, rc6_ctx_t *s);
+ 
+void rc6_free(rc6_ctx_t *s); 
+
+#endif /* RC6_H_ */
+
diff --git a/seed-asm.S b/seed-asm.S
deleted file mode 100644
index 84866c3..0000000
--- a/seed-asm.S
+++ /dev/null
@@ -1,957 +0,0 @@
-/* seed-asm.S */
-/*
-    This file is part of the AVR-Crypto-Lib.
-    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-/**
- * \file	seed-asm.S
- * \author	Daniel Otte 
- * \date	2007-06-1
- * \brief	SEED parts in assembler for AVR
- * \par License	
- * GPLv3 or later
- * 
- */
-#include "avr-asm-macros.S"
-
-/******************************************************************************/	
-/*
-#define M0 0xfc
-#define M1 0xf3
-#define M2 0xcf
-#define M3 0x3f
-
-#define X3 (((uint8_t*)(&x))[0])
-#define X2 (((uint8_t*)(&x))[1])
-#define X1 (((uint8_t*)(&x))[2])
-#define X0 (((uint8_t*)(&x))[3])
-
-#define Z3 (((uint8_t*)(&z))[0])
-#define Z2 (((uint8_t*)(&z))[1])
-#define Z1 (((uint8_t*)(&z))[2])
-#define Z0 (((uint8_t*)(&z))[3])
-
-uint32_t g_function(uint32_t x){
-	uint32_t z;
-	/ * sbox substitution * /
-	X3 = pgm_read_byte(&(seed_sbox2[X3]));
-	X2 = pgm_read_byte(&(seed_sbox1[X2]));
-	X1 = pgm_read_byte(&(seed_sbox2[X1]));
-	X0 = pgm_read_byte(&(seed_sbox1[X0]));
-	/ * now the permutation * /
-	Z0 = (X0 & M0) ^ (X1 & M1) ^ (X2 & M2) ^ (X3 & M3);
-	Z1 = (X0 & M1) ^ (X1 & M2) ^ (X2 & M3) ^ (X3 & M0);
-	Z2 = (X0 & M2) ^ (X1 & M3) ^ (X2 & M0) ^ (X3 & M1);
-	Z3 = (X0 & M3) ^ (X1 & M0) ^ (X2 & M1) ^ (X3 & M2);
-	return z;
-}
-*/
-M0 = 0xfc
-M1 = 0xf3
-M2 = 0xcf
-M3 = 0x3f
-X0 = 18
-X1 = 19
-X2 = 20
-X3 = 21
-Z0 = 25
-Z1 = 24
-Z2 = 23
-Z3 = 22
-T0 = X0
-T1 = 26
-T2 = 27
-T3 = X1
-/*
- *  param x: r22:r25
- *  X0 = R25
- *  X1 = R24
- *  X2 = R23
- *  X3 = R22
- */    
-seed_g_function:
-	ldi r30, lo8(seed_sbox1)
-	ldi r31, hi8(seed_sbox1)
- 	movw r26, r30
-	add r30, Z2
-	adc r31, r1
-	lpm X2, Z
-	movw r30, r26
-	add r30, Z0
-	adc r31, r1
-	lpm X0, Z
-	inc r27 /* switch X to point to sbox2 */
-	movw r30, r26
-	add r30, Z3
-	adc r31, r1
-	lpm X3, Z
-	movw r30, r26
-	add r30, Z1
-	adc r31, r1
-	lpm X1, Z
-	/* now the secound part */
-	mov Z0, X0
-	mov Z1, X0
-	mov Z2, X0
-	mov Z3, X0
-	andi Z0, M0
-	andi Z1, M1
-	andi Z2, M2
-	andi Z3, M3	
-	mov T0, X1
-	mov T1, X1
-	mov T2, X1
-	; mov T3, X1 /* T3 = X1 */
-	andi T0, M1
-	andi T1, M2
-	andi T2, M3
-	andi T3, M0
-	eor Z0, T0
-	eor Z1, T1
-	eor Z2, T2
-	eor Z3, T3
-	mov T0, X2
-	mov T1, X2
-	mov T2, X2
-	mov T3, X2
-	andi T0, M2
-	andi T1, M3
-	andi T2, M0
-	andi T3, M1
-	eor Z0, T0
-	eor Z1, T1
-	eor Z2, T2
-	eor Z3, T3
-	mov T0, X3
-	mov T1, X3
-	mov T2, X3
-	mov T3, X3
-	andi T0, M3
-	andi T1, M0
-	andi T2, M1
-	andi T3, M2
-	eor Z0, T0
-	eor Z1, T1
-	eor Z2, T2
-	eor Z3, T3
-	ret
-
-seed_sbox1:
-.byte   169,  133,  214,  211,   84,   29,  172,   37 
-.byte    93,   67,   24,   30,   81,  252,  202,   99 
-.byte    40,   68,   32,  157,  224,  226,  200,   23 
-.byte   165,  143,    3,  123,  187,   19,  210,  238 
-.byte   112,  140,   63,  168,   50,  221,  246,  116 
-.byte   236,  149,   11,   87,   92,   91,  189,    1 
-.byte    36,   28,  115,  152,   16,  204,  242,  217 
-.byte    44,   231, 114,  131,  155,  209,  134,  201 
-.byte    96,   80,  163,  235,   13,  182,  158,   79 
-.byte   183,   90,  198,  120,  166,   18,  175,  213 
-.byte    97,  195,  180,   65,   82,  125,  141,    8 
-.byte    31,  153,    0,   25,    4,   83,  247,  225 
-.byte   253,  118,   47,   39,  176,  139,   14,  171 
-.byte   162,  110,  147,   77,  105,  124,    9,   10 
-.byte   191,  239,  243,  197,  135,   20,  254,  100 
-.byte   222,   46,   75,   26,    6,   33,  107,  102 
-.byte     2,  245,  146,  138,   12,  179,  126,  208 
-.byte   122,   71,  150,  229,   38,  128,  173,  223 
-.byte   161,   48,   55,  174,   54,   21,   34,   56 
-.byte   244,  167,   69,   76,  129,  233,  132,  151 
-.byte    53,  203,  206,   60,  113,   17,  199,  137 
-.byte   117,  251,  218,  248,  148,   89,  130,  196 
-.byte   255,   73,   57,  103,  192,  207,  215,  184 
-.byte    15,  142,   66,   35,  145,  108,  219,  164 
-.byte    52,  241,   72,  194,  111,   61,   45,   64 
-.byte   190,   62,  188,  193,  170,  186,   78,   85 
-.byte    59,  220,  104,  127,  156,  216,   74,   86 
-.byte   119,  160,  237,   70,  181,   43,  101,  250 
-.byte   227,  185,  177,  159,   94,  249,  230,  178 
-.byte    49,  234,  109,   95,  228,  240,  205,  136 
-.byte    22,   58,   88,  212,   98,   41,    7,   51 
-.byte   232,   27,    5,  121,  144,  106,   42,  154
-
-
-seed_sbox2:
-.byte    56,  232,   45,  166,  207,  222,  179,  184 
-.byte   175,   96,   85,  199,   68,  111,  107,   91 
-.byte   195,   98,   51,  181,   41,  160,  226,  167 
-.byte   211,  145,   17,    6,   28,  188,   54,   75 
-.byte   239,  136,  108,  168,   23,  196,   22,  244 
-.byte   194,   69,  225,  214,   63,   61,  142,  152 
-.byte    40,   78,  246,   62,  165,  249,   13,  223 
-.byte   216,   43,  102,  122,   39,   47,  241,  114 
-.byte    66,  212,   65,  192,  115,  103,  172,  139 
-.byte   247,  173,  128,   31,  202,   44,  170,   52 
-.byte   210,   11,  238,  233,   93,  148,   24,  248 
-.byte    87,  174,    8,  197,   19,  205,  134,  185 
-.byte   255,  125,  193,   49,  245,  138,  106,  177 
-.byte   209,   32,  215,    2,   34,    4,  104,  113 
-.byte     7,  219,  157,  153,   97,  190,  230,   89 
-.byte   221,   81,  144,  220,  154,  163,  171,  208 
-.byte   129,   15,   71,   26,  227,  236,  141,  191 
-.byte   150,  123,   92,  162,  161,   99,   35,   77 
-.byte   200,  158,  156,   58,   12,   46,  186,  110 
-.byte   159,   90,  242,  146,  243,   73,  120,  204 
-.byte    21,  251,  112,  117,  127,   53,   16,    3 
-.byte   100,  109,  198,  116,  213,  180,  234,    9 
-.byte   118,   25,  254,   64,   18,  224,  189,    5 
-.byte   250,    1,  240,   42,   94,  169,   86,   67 
-.byte   133,   20,  137,  155,  176,  229,   72,  121 
-.byte   151,  252,   30,  130,   33,  140,   27,   95 
-.byte   119,   84,  178,   29,   37,   79,    0,   70 
-.byte   237,   88,   82,  235,  126,  218,  201,  253 
-.byte    48,  149,  101,   60,  182,  228,  187,  124 
-.byte    14,   80,   57,   38,   50,  132,  105,  147 
-.byte    55,  231,   36,  164,  203,   83,   10,  135 
-.byte   217,   76,  131,  143,  206,   59,   74,  183 
-
-/******************************************************************************/
-
-/*
-static
-uint64_t f_function(const uint64_t* a, uint32_t k0, uint32_t k1){
-	uint32_t c,d;
-
-	c = *a & 0x00000000FFFFFFFFLL;
-	d = (*a>>32) & 0x00000000FFFFFFFFLL;
-	
-	c ^= k0; d ^= k1;
-	d ^= c;
-	d = g_function(d);
-	c = bigendian_sum32(c,d);
-	c = g_function(c);
-	d = bigendian_sum32(c,d);
-	d = g_function(d);
-	c = bigendian_sum32(c,d);	
-	return ((uint64_t)d << 32) | c;
-}
-*/
-/*
- * param a   r24:r25
- * param k0  r20:r23
- * param k1  r16:r19
- */
-D0 = 10
-D1 = 11
-C0 = 12
-C1 = 13
-C2 = 14
-C3 = 15
-D2 = 16
-D3 = 17
-seed_f_function:
-	push_range 10, 17
-	movw r30, r24
-	ld C0, Z+
-	ld C1, Z+
-	ld C2, Z+
-	ld C3, Z+
-	eor C0, r20
-	eor C1, r21
-	eor C2, r22
-	eor C3, r23
-	ld r22, Z+
-	ld r23, Z+
-	ld r24, Z+
-	ld r25, Z+
-	eor r22, r16
-	eor r23, r17
-	eor r24, r18
-	eor r25, r19
-	eor r22, C0
-	eor r23, C1
-	eor r24, C2
-	eor r25, C3
-	rcall seed_g_function
-	mov D0, r22
-	mov D1, r23
-	mov D2, r24
-	mov D3, r25
-
-	add r25, C3
-	adc r24, C2
-	adc r23, C1
-	adc r22, C0
-	rcall seed_g_function
-	mov C0, r22
-	mov C1, r23
-	mov C2, r24
-	mov C3, r25
-
-	add r25, D3
-	adc r24, D2
-	adc r23, D1
-	adc r22, D0
-	rcall seed_g_function
-	mov D0, r22
-	mov D1, r23
-	mov D2, r24
-	mov D3, r25
-
-	add C3, r25
-	adc C2, r24
-	adc C1, r23
-	adc C0, r22
-
-	mov r18, C0
-	mov r19, C1
-	mov r20, C2
-	mov r21, C3
-	
-	pop_range 10, 17
-	ret
-
-/******************************************************************************/
-/*
-void seed_init(uint8_t * key, seed_ctx_t * ctx){
-	memcpy(ctx->k, key, 128/8);
-}
-*/
-
-.global seed_init
-seed_init:
-	movw r26, r24
-	movw r30, r22
-	ldi r22, 16
-1:
-	ld r0, X+
-	st Z+, r0
-	dec r22
-	brne 1b	
-	ret
-/******************************************************************************/
-/*
-typedef struct {
-	uint32_t k0, k1;
-} keypair_t;
-
-keypair_t getnextkeys(uint32_t *keystate, uint8_t curround){
-	keypair_t ret;
-	if (curround>15){
-		/ * ERROR * /
-		ret.k0 = ret.k1 = 0;
-	} else {
-	/ *	ret.k0 = seed_g_function(keystate[0] + keystate[2] - pgm_read_dword(&(seed_kc[curround])));
-		ret.k1 = seed_g_function(keystate[1] - keystate[3] + pgm_read_dword(&(seed_kc[curround]))); * /
-		ret.k0 = bigendian_sum32(keystate[0], keystate[2]);
-		ret.k0 = bigendian_sub32(ret.k0, pgm_read_dword(&(seed_kc[curround])));
-		ret.k0 = seed_g_function(ret.k0);
-		ret.k1 = bigendian_sub32(keystate[1], keystate[3]);
-		ret.k1 = bigendian_sum32(ret.k1, pgm_read_dword(&(seed_kc[curround])));
-		ret.k1 = seed_g_function(ret.k1);
-		
-		if (curround & 1){
-			/ * odd round (1,3,5, ...) * /
-			((uint64_t*)keystate)[1] = bigendian_rotl8_64( ((uint64_t*)keystate)[1] );
-		} else {
-			/ * even round (0,2,4, ...) * /
-			((uint64_t*)keystate)[0] = bigendian_rotr8_64(((uint64_t*)keystate)[0]);
-		}
-	}
-	return ret;
-}
-*/
-/*
- * param keystate: r24:r25
- * param curround: r22
- */
-XRC0 = 10
-XRC1 = 11
-XRC2 = 12
-XRC3 = 13
-D0 = 14
-D1 = 15
-D2 = 16
-D3 = 17
-
-compute_keys:
-	ldi r30, lo8(seed_kc)
-	ldi r31, hi8(seed_kc)
-	lsl r22
-	lsl r22
-	add r30, r22
-	adc r31, r1
-	lpm XRC0, Z+
-	lpm XRC1, Z+
-	lpm XRC2, Z+
-	lpm XRC3, Z+
-	movw r28, r24
-	ldd r25, Y+0*4+3
-	ldd r24, Y+0*4+2
-	ldd r23, Y+0*4+1
-	ldd r22, Y+0*4+0
-	
-	ldd r0, Y+2*4+3
-	add r25, r0
-	ldd r0, Y+2*4+2
-	adc r24, r0
-	ldd r0, Y+2*4+1
-	adc r23, r0
-	ldd r0, Y+2*4+0
-	adc r22, r0
-
-	sub r25, XRC3
-	sbc r24, XRC2
-	sbc r23, XRC1
-	sbc r22, XRC0
-	rcall seed_g_function
-	mov D0, r22
-	mov D1, r23
-	mov D2, r24
-	mov D3, r25
-	
-
-	ldd r25, Y+1*4+3
-	ldd r24, Y+1*4+2
-	ldd r23, Y+1*4+1
-	ldd r22, Y+1*4+0
-
-	ldd r0, Y+3*4+3
-	sub r25, r0
-	ldd r0, Y+3*4+2
-	sbc r24, r0
-	ldd r0, Y+3*4+1
-	sbc r23, r0
-	ldd r0, Y+3*4+0
-	sbc r22, r0
-
-	add r25, XRC3
-	adc r24, XRC2
-	adc r23, XRC1
-	adc r22, XRC0
-	rcall seed_g_function
-
-	mov r21, D3
-	mov r20, D2
-	mov r19, D1
-	mov r18, D0 
-	ret
-
-seed_getnextkeys:
-	push_range 10, 17
-	push r28
-	push r29
-;	andi r22, 0x0F
-	bst r22,0
-	rcall compute_keys		
-	brtc even_round
-odd_round:
-
-	adiw r28, 8
-	ld r26, Y
-	ldd r0, Y+1
-	std Y+0, r0
-	ldd r0, Y+2
-	std Y+1, r0
-	ldd r0, Y+3
-	std Y+2, r0
-	ldd r0, Y+4
-	std Y+3, r0
-	ldd r0, Y+5
-	std Y+4, r0
-	ldd r0, Y+6
-	std Y+5, r0
-	ldd r0, Y+7
-	std Y+6, r0
-	std Y+7, r26	
-/*
-	movw r30, r28
-	ld r26, Z+
-	ldi r27, 7
-1:
-	ld r0, Z+
-	st Y+, r0
-	dec r27
-	brne 1b
-	st Y, r26
-*/	
-	rjmp 4f
-
-even_round:
-
-	ldd r26, Y+7
-	ldd r0, Y+6
-	std Y+7, r0
-	ldd r0, Y+5
-	std Y+6, r0
-	ldd r0, Y+4
-	std Y+5, r0
-	ldd r0, Y+3
-	std Y+4, r0
-	ldd r0, Y+2
-	std Y+3, r0
-	ldd r0, Y+1
-	std Y+2, r0
-	ldd r0, Y+0
-	std Y+1, r0
-	std Y+0, r26
-/*
-	adiw r28, 7	
-	ld r26, Y
-	ldi r27, 7	
-1:
-	ld r0, -Y
-	std Y+1, r0
-	dec r27
-	brne 1b
-	st Y, r26
-*/
-4:	
-	pop r29
-	pop r28
-	pop_range 10, 17
-	ret
-
-/******************************************************************************/
-/*
-keypair_t getprevkeys(uint32_t *keystate, uint8_t curround){
-	keypair_t ret;
-	if (curround>15){
-		/ * ERROR * /
-		ret.k0 = ret.k1 = 0;
-	} else {
-		if (curround & 1){
-			/ * odd round (1,3,5, ..., 15) * /
-			((uint64_t*)keystate)[1] = bigendian_rotr8_64( ((uint64_t*)keystate)[1] );
-		} else {
-			/ * even round (0,2,4, ..., 14) * /
-			((uint64_t*)keystate)[0] = bigendian_rotl8_64(((uint64_t*)keystate)[0]);
-		}
-	/ *	ret.k0 = seed_g_function(keystate[0] + keystate[2] - pgm_read_dword(&(seed_kc[curround])));
-		ret.k1 = seed_g_function(keystate[1] - keystate[3] + pgm_read_dword(&(seed_kc[curround]))); * /
-		ret.k0 = bigendian_sum32(keystate[0], keystate[2]);
-		ret.k0 = bigendian_sub32(ret.k0, pgm_read_dword(&(seed_kc[curround])));
-		ret.k0 = seed_g_function(ret.k0);
-		ret.k1 = bigendian_sub32(keystate[1], keystate[3]);
-		ret.k1 = bigendian_sum32(ret.k1, pgm_read_dword(&(seed_kc[curround])));
-		ret.k1 = seed_g_function(ret.k1);
-		}
-	return ret;
-}
-*/
-/*
- * param keystate: r24:r25
- * param curround: r22
- */
-
-seed_getprevkeys:
-	push_range 10, 17
-	push r28
-	push r29
-	movw r28, r24	
-;	andi r22, 0x0F
-	bst r22, 0
-	brts r_odd_round
-r_even_round:
-	ldd r26, Y+0
-	ldd r0, Y+1
-	std Y+0, r0
-	ldd r0, Y+2
-	std Y+1, r0
-	ldd r0, Y+3
-	std Y+2, r0
-	ldd r0, Y+4
-	std Y+3, r0
-	ldd r0, Y+5
-	std Y+4, r0
-	ldd r0, Y+6
-	std Y+5, r0
-	ldd r0, Y+7
-	std Y+6, r0
-	std Y+7, r26	
-/*
-	movw r30, r28
-	ld r26, Z+
-	ldi r27, 7
-1:
-	ld r0, Z+
-	st Y+, r0
-	dec r27
-	brne 1b
-	st Y, r26
-*/	
-
-	rjmp 4f
-r_odd_round:
-	ldd r26, Y+8+7
-	ldd r0, Y+8+6
-	std Y+8+7, r0
-	ldd r0, Y+8+5
-	std Y+8+6, r0
-	ldd r0, Y+8+4
-	std Y+8+5, r0
-	ldd r0, Y+8+3
-	std Y+8+4, r0
-	ldd r0, Y+8+2
-	std Y+8+3, r0
-	ldd r0, Y+8+1
-	std Y+8+2, r0
-	ldd r0, Y+8+0
-	std Y+8+1, r0
-	std Y+8+0, r26
-/*
-	adiw r28, 7	
-	ld r26, Y
-	ldi r27, 7	
-1:
-	ld r0, -Y
-	std Y+1, r0
-	dec r27
-	brne 1b
-	st Y, r26
-*/
-4:
-	rcall compute_keys	
-
-	pop r29
-	pop r28
-	pop_range 10, 17
-	ret
-
-/******************************************************************************/
-
-seed_kc:
-.long   0xb979379e 
-.long   0x73f36e3c
-.long   0xe6e6dd78 
-.long   0xcccdbbf1 
-.long   0x999b77e3 
-.long   0x3337efc6 
-.long   0x676ede8d 
-.long   0xcfdcbc1b 
-.long   0x9eb97937
-.long   0x3c73f36e	
-.long   0x78e6e6dd
-.long   0xf1cccdbb
-.long   0xe3999b77
-.long   0xc63337ef
-.long   0x8d676ede
-.long   0x1bcfdcbc
-
-/******************************************************************************/
-/*
-#define L (((uint64_t*)buffer)[0])
-#define R (((uint64_t*)buffer)[1])
-
-void seed_enc(void * buffer, seed_ctx_t * ctx){
-	uint8_t r;
-	keypair_t k;
-	for(r=0; r<8; ++r){
-			k = seed_getnextkeys(ctx->k, 2*r);
-/ *
-	DEBUG_S("\r\n\tDBG ka,0: "); uart_hexdump(&k.k0, 4);
-	DEBUG_S("\r\n\tDBG ka,1: "); uart_hexdump(&k.k1, 4);
-	DEBUG_S("\r\n\t DBG L:   "); uart_hexdump((uint8_t*)buffer+0, 8);
-	DEBUG_S("\r\n\t DBG R:   "); uart_hexdump((uint8_t*)buffer+8, 8);
-* /
-			L ^= seed_f_function(&R,k.k0,k.k1);
-			
-			k = seed_getnextkeys(ctx->k, 2*r+1);
-/ *
-	DEBUG_S("\r\n\tDBG kb,0: "); uart_hexdump(&k.k0, 4);
-	DEBUG_S("\r\n\tDBG kb,1: "); uart_hexdump(&k.k1, 4);
-	DEBUG_S("\r\n\t DBG L:   "); uart_hexdump((uint8_t*)buffer+8, 8);
-	DEBUG_S("\r\n\t DBG R:   "); uart_hexdump((uint8_t*)buffer+0, 8);
-* /
-			R ^= seed_f_function(&L,k.k0,k.k1);
-	}
-	/ * just an exchange without temp. variable * /
-	L ^= R;
-	R ^= L;
-	L ^= R;
-}
-*/
-/*
- * param buffer: r24:r25
- * param ctx:    r22:r23
- */
-CTR  = 9
-xLPTR = 10
-xRPTR = 12
-CPTR = 14
-
-.global seed_enc
-seed_enc:
-	push_range 9, 17
-	push r28
-	push r29	
-	clr CTR
-	movw xLPTR, r24
-	adiw r24, 8
-	movw xRPTR, r24		
-	movw CPTR, r22
-1:
-	movw r28, xLPTR
-	movw r24, CPTR
-	mov r22, CTR
-	lsl r22
-	rcall seed_getnextkeys	
-
-	/* use pen & paper to understand the following permutation */
-	movw r16, r22
-	movw r22, r18
-	movw r18, r24
-	movw r24, r20
-	movw r20, r22
-	movw r22, r24
-	movw r24, xRPTR	
-
-	rcall seed_f_function
-
-	ld r0, Y
-	eor r0, r18
-	st Y+, r0
-	ld r0, Y
-	eor r0, r19
-	st Y+, r0
-	ld r0, Y
-	eor r0, r20
-	st Y+, r0
-	ld r0, Y
-	eor r0, r21
-	st Y+, r0
-	ld r0, Y
-	eor r0, r22
-	st Y+, r0
-	ld r0, Y
-	eor r0, r23
-	st Y+, r0
-	ld r0, Y
-	eor r0, r24
-	st Y+, r0
-	ld r0, Y
-	eor r0, r25
-	st Y+, r0
-	/* secound half */
-	movw r24, CPTR
-	mov r22, CTR
-	lsl r22
-	inc r22
-	rcall seed_getnextkeys	
-
-	movw r16, r22
-	movw r22, r18
-	movw r18, r24
-	movw r24, r20
-	movw r20, r22
-	movw r22, r24
-	movw r24, xLPTR	
-	
-	rcall seed_f_function
-
-	ld r0, Y
-	eor r0, r18
-	st Y+, r0
-	ld r0, Y
-	eor r0, r19
-	st Y+, r0
-	ld r0, Y
-	eor r0, r20
-	st Y+, r0
-	ld r0, Y
-	eor r0, r21
-	st Y+, r0
-	ld r0, Y
-	eor r0, r22
-	st Y+, r0
-	ld r0, Y
-	eor r0, r23
-	st Y+, r0
-	ld r0, Y
-	eor r0, r24
-	st Y+, r0
-	ld r0, Y
-	eor r0, r25
-	st Y+, r0
-	
-	inc CTR
-	bst CTR, 3
-	brts 3f
-	rjmp 1b
-3:
-	movw r28, xLPTR
-	movw r30, xRPTR
-	ldi r17, 8
-4:
-	ld r10, Y
-	ld r11, Z
-	st Z+, r10
-	st Y+, r11
-	dec r17
-	brne 4b
-5:
-	pop r29
-	pop r28
-	pop_range 9, 17
-	ret
-
-/******************************************************************************/
-/*
-#define L (((uint64_t*)buffer)[0])
-#define R (((uint64_t*)buffer)[1])
-
-void seed_dec(void * buffer, seed_ctx_t * ctx){
-	int8_t r;
-	keypair_t k;
-	for(r=7; r>=0; --r){
-			k = seed_getprevkeys(ctx->k, 2*r+1);
-/ *
-	DEBUG_S("\r\n\tDBG ka,0: "); uart_hexdump(&k.k0, 4);
-	DEBUG_S("\r\n\tDBG ka,1: "); uart_hexdump(&k.k1, 4);
-	DEBUG_S("\r\n\t DBG L: "); uart_hexdump((uint8_t*)buffer+0, 8);
-	DEBUG_S("\r\n\t DBG R: "); uart_hexdump((uint8_t*)buffer+8, 8);
-* /
-			L ^= seed_f_function(&R,k.k0,k.k1);
-			
-			k = seed_getprevkeys(ctx->k, 2*r+0);
-/ *
-	DEBUG_S("\r\n\tDBG kb,0: "); uart_hexdump(&k.k0, 4);
-	DEBUG_S("\r\n\tDBG kb,1: "); uart_hexdump(&k.k1, 4);
-	DEBUG_S("\r\n\t DBG L: "); uart_hexdump((uint8_t*)buffer+8, 8);
-	DEBUG_S("\r\n\t DBG R: "); uart_hexdump((uint8_t*)buffer+0, 8);
-* /
-			R ^= seed_f_function(&L,k.k0,k.k1);
-	}
-	/ * just an exchange without temp. variable * /
-	L ^= R;
-	R ^= L;
-	L ^= R;
-}
-*/
-/*
- * param buffer: r24:r25
- * param ctx:    r22:r23
- */
-CTR  = 9
-xLPTR = 10
-xRPTR = 12
-CPTR = 14
-
-.global seed_dec
-seed_dec:
-	push_range 9, 17
-	push r28
-	push r29	
-	ldi r16, 7
-	mov CTR, r16
-	movw xLPTR, r24
-	adiw r24, 8
-	movw xRPTR, r24		
-	movw CPTR, r22
-1:
-	movw r28, xLPTR
-	movw r24, CPTR
-	mov r22, CTR
-	lsl r22
-	inc r22
-	rcall seed_getprevkeys	
-
-	/* use pen & paper to understand the following permutation */
-	movw r16, r22
-	movw r22, r18
-	movw r18, r24
-	movw r24, r20
-	movw r20, r22
-	movw r22, r24
-	movw r24, xRPTR	
-
-	rcall seed_f_function
-
-	ld r0, Y
-	eor r0, r18
-	st Y+, r0
-	ld r0, Y
-	eor r0, r19
-	st Y+, r0
-	ld r0, Y
-	eor r0, r20
-	st Y+, r0
-	ld r0, Y
-	eor r0, r21
-	st Y+, r0
-	ld r0, Y
-	eor r0, r22
-	st Y+, r0
-	ld r0, Y
-	eor r0, r23
-	st Y+, r0
-	ld r0, Y
-	eor r0, r24
-	st Y+, r0
-	ld r0, Y
-	eor r0, r25
-	st Y+, r0
-	/* secound half */
-	movw r24, CPTR
-	mov r22, CTR
-	lsl r22
-	rcall seed_getprevkeys	
-
-	movw r16, r22
-	movw r22, r18
-	movw r18, r24
-	movw r24, r20
-	movw r20, r22
-	movw r22, r24
-	movw r24, xLPTR	
-	
-	rcall seed_f_function
-
-	ld r0, Y
-	eor r0, r18
-	st Y+, r0
-	ld r0, Y
-	eor r0, r19
-	st Y+, r0
-	ld r0, Y
-	eor r0, r20
-	st Y+, r0
-	ld r0, Y
-	eor r0, r21
-	st Y+, r0
-	ld r0, Y
-	eor r0, r22
-	st Y+, r0
-	ld r0, Y
-	eor r0, r23
-	st Y+, r0
-	ld r0, Y
-	eor r0, r24
-	st Y+, r0
-	ld r0, Y
-	eor r0, r25
-	st Y+, r0
-	
-	dec CTR
-	brmi 3f
-	rjmp 1b
-3:
-	movw r28, xLPTR
-	movw r30, xRPTR
-	ldi r17, 8
-4:
-	ld r10, Y
-	ld r11, Z
-	st Z+, r10
-	st Y+, r11
-	dec r17
-	brne 4b
-5:
-	pop r29
-	pop r28
-	pop_range 9, 17
-	ret
-
diff --git a/seed.h b/seed.h
deleted file mode 100644
index 78d7d59..0000000
--- a/seed.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/* seed.h */
-/*
-    This file is part of the AVR-Crypto-Lib.
-    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-/**
- * \file	seed.h
- * \author	Daniel Otte 
- * \date	2007-06-1
- * \brief 	declarations for seed
- * \par License	
- * GPL
- * 
- */
-#ifndef SEED_H_
-#define SEED_H_
-
-#include <stdint.h>
-/** \typedef seed_ctx_t
- * \brief SEED context
- * 
- * A variable of this type may hold the key material for the SEED cipher. 
- * This context is regulary generated by the 
- * void seed_init(const void * key, seed_ctx_t * ctx) function.
- */
-typedef struct{
-	uint32_t k[4];
-} seed_ctx_t;
-
-/******************************************************************************/
-
-/** \fn void seed_init(const void * key, seed_ctx_t * ctx)
- * \brief initializes context for SEED operation
- * 
- * This function copys the key material into a context variable.
- * 
- * \param key  pointer to the key material (128 bit = 16 bytes)
- * \param ctx  pointer to the context (seed_ctx_t)
- */
-void seed_init(const void * key, seed_ctx_t * ctx);
-
-/** \fn void seed_enc(void * buffer,const seed_ctx_t * ctx)
- * \brief encrypt a block with SEED
- * 
- * This function encrypts a block of 64 bits (8 bytes) with the SEED algorithm.
- * The round keys are computed on demand, so the context is modifyed while
- * encrypting but the original stated is restored when the function exits.
- * 
- * \param buffer pointer to the block (64 bit = 8 byte) which will be encrypted
- * \param ctx    pointer to the key material (seed_ctx_t)
- */
-void seed_enc(void * buffer, const seed_ctx_t * ctx);
-
-
-/** \fn void seed_dec(void * buffer, const seed_ctx_t * ctx)
- * \brief decrypt a block with SEED
- * 
- * This function decrypts a block of 64 bits (8 bytes) with the SEED algorithm.
- * The round keys are computed on demand, so the context is modifyed while
- * decrypting but the original stated is restored when the function exits.
- * 
- * \param buffer pointer to the block (64 bit = 8 byte) which will be decrypted
- * \param ctx    pointer to the key material (seed_ctx_t)
- */
-void seed_dec(void * buffer, const seed_ctx_t * ctx);
-
-	
-#endif /*SEED_H_*/
diff --git a/seed/seed-asm.S b/seed/seed-asm.S
new file mode 100644
index 0000000..84866c3
--- /dev/null
+++ b/seed/seed-asm.S
@@ -0,0 +1,957 @@
+/* seed-asm.S */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/**
+ * \file	seed-asm.S
+ * \author	Daniel Otte 
+ * \date	2007-06-1
+ * \brief	SEED parts in assembler for AVR
+ * \par License	
+ * GPLv3 or later
+ * 
+ */
+#include "avr-asm-macros.S"
+
+/******************************************************************************/	
+/*
+#define M0 0xfc
+#define M1 0xf3
+#define M2 0xcf
+#define M3 0x3f
+
+#define X3 (((uint8_t*)(&x))[0])
+#define X2 (((uint8_t*)(&x))[1])
+#define X1 (((uint8_t*)(&x))[2])
+#define X0 (((uint8_t*)(&x))[3])
+
+#define Z3 (((uint8_t*)(&z))[0])
+#define Z2 (((uint8_t*)(&z))[1])
+#define Z1 (((uint8_t*)(&z))[2])
+#define Z0 (((uint8_t*)(&z))[3])
+
+uint32_t g_function(uint32_t x){
+	uint32_t z;
+	/ * sbox substitution * /
+	X3 = pgm_read_byte(&(seed_sbox2[X3]));
+	X2 = pgm_read_byte(&(seed_sbox1[X2]));
+	X1 = pgm_read_byte(&(seed_sbox2[X1]));
+	X0 = pgm_read_byte(&(seed_sbox1[X0]));
+	/ * now the permutation * /
+	Z0 = (X0 & M0) ^ (X1 & M1) ^ (X2 & M2) ^ (X3 & M3);
+	Z1 = (X0 & M1) ^ (X1 & M2) ^ (X2 & M3) ^ (X3 & M0);
+	Z2 = (X0 & M2) ^ (X1 & M3) ^ (X2 & M0) ^ (X3 & M1);
+	Z3 = (X0 & M3) ^ (X1 & M0) ^ (X2 & M1) ^ (X3 & M2);
+	return z;
+}
+*/
+M0 = 0xfc
+M1 = 0xf3
+M2 = 0xcf
+M3 = 0x3f
+X0 = 18
+X1 = 19
+X2 = 20
+X3 = 21
+Z0 = 25
+Z1 = 24
+Z2 = 23
+Z3 = 22
+T0 = X0
+T1 = 26
+T2 = 27
+T3 = X1
+/*
+ *  param x: r22:r25
+ *  X0 = R25
+ *  X1 = R24
+ *  X2 = R23
+ *  X3 = R22
+ */    
+seed_g_function:
+	ldi r30, lo8(seed_sbox1)
+	ldi r31, hi8(seed_sbox1)
+ 	movw r26, r30
+	add r30, Z2
+	adc r31, r1
+	lpm X2, Z
+	movw r30, r26
+	add r30, Z0
+	adc r31, r1
+	lpm X0, Z
+	inc r27 /* switch X to point to sbox2 */
+	movw r30, r26
+	add r30, Z3
+	adc r31, r1
+	lpm X3, Z
+	movw r30, r26
+	add r30, Z1
+	adc r31, r1
+	lpm X1, Z
+	/* now the secound part */
+	mov Z0, X0
+	mov Z1, X0
+	mov Z2, X0
+	mov Z3, X0
+	andi Z0, M0
+	andi Z1, M1
+	andi Z2, M2
+	andi Z3, M3	
+	mov T0, X1
+	mov T1, X1
+	mov T2, X1
+	; mov T3, X1 /* T3 = X1 */
+	andi T0, M1
+	andi T1, M2
+	andi T2, M3
+	andi T3, M0
+	eor Z0, T0
+	eor Z1, T1
+	eor Z2, T2
+	eor Z3, T3
+	mov T0, X2
+	mov T1, X2
+	mov T2, X2
+	mov T3, X2
+	andi T0, M2
+	andi T1, M3
+	andi T2, M0
+	andi T3, M1
+	eor Z0, T0
+	eor Z1, T1
+	eor Z2, T2
+	eor Z3, T3
+	mov T0, X3
+	mov T1, X3
+	mov T2, X3
+	mov T3, X3
+	andi T0, M3
+	andi T1, M0
+	andi T2, M1
+	andi T3, M2
+	eor Z0, T0
+	eor Z1, T1
+	eor Z2, T2
+	eor Z3, T3
+	ret
+
+seed_sbox1:
+.byte   169,  133,  214,  211,   84,   29,  172,   37 
+.byte    93,   67,   24,   30,   81,  252,  202,   99 
+.byte    40,   68,   32,  157,  224,  226,  200,   23 
+.byte   165,  143,    3,  123,  187,   19,  210,  238 
+.byte   112,  140,   63,  168,   50,  221,  246,  116 
+.byte   236,  149,   11,   87,   92,   91,  189,    1 
+.byte    36,   28,  115,  152,   16,  204,  242,  217 
+.byte    44,   231, 114,  131,  155,  209,  134,  201 
+.byte    96,   80,  163,  235,   13,  182,  158,   79 
+.byte   183,   90,  198,  120,  166,   18,  175,  213 
+.byte    97,  195,  180,   65,   82,  125,  141,    8 
+.byte    31,  153,    0,   25,    4,   83,  247,  225 
+.byte   253,  118,   47,   39,  176,  139,   14,  171 
+.byte   162,  110,  147,   77,  105,  124,    9,   10 
+.byte   191,  239,  243,  197,  135,   20,  254,  100 
+.byte   222,   46,   75,   26,    6,   33,  107,  102 
+.byte     2,  245,  146,  138,   12,  179,  126,  208 
+.byte   122,   71,  150,  229,   38,  128,  173,  223 
+.byte   161,   48,   55,  174,   54,   21,   34,   56 
+.byte   244,  167,   69,   76,  129,  233,  132,  151 
+.byte    53,  203,  206,   60,  113,   17,  199,  137 
+.byte   117,  251,  218,  248,  148,   89,  130,  196 
+.byte   255,   73,   57,  103,  192,  207,  215,  184 
+.byte    15,  142,   66,   35,  145,  108,  219,  164 
+.byte    52,  241,   72,  194,  111,   61,   45,   64 
+.byte   190,   62,  188,  193,  170,  186,   78,   85 
+.byte    59,  220,  104,  127,  156,  216,   74,   86 
+.byte   119,  160,  237,   70,  181,   43,  101,  250 
+.byte   227,  185,  177,  159,   94,  249,  230,  178 
+.byte    49,  234,  109,   95,  228,  240,  205,  136 
+.byte    22,   58,   88,  212,   98,   41,    7,   51 
+.byte   232,   27,    5,  121,  144,  106,   42,  154
+
+
+seed_sbox2:
+.byte    56,  232,   45,  166,  207,  222,  179,  184 
+.byte   175,   96,   85,  199,   68,  111,  107,   91 
+.byte   195,   98,   51,  181,   41,  160,  226,  167 
+.byte   211,  145,   17,    6,   28,  188,   54,   75 
+.byte   239,  136,  108,  168,   23,  196,   22,  244 
+.byte   194,   69,  225,  214,   63,   61,  142,  152 
+.byte    40,   78,  246,   62,  165,  249,   13,  223 
+.byte   216,   43,  102,  122,   39,   47,  241,  114 
+.byte    66,  212,   65,  192,  115,  103,  172,  139 
+.byte   247,  173,  128,   31,  202,   44,  170,   52 
+.byte   210,   11,  238,  233,   93,  148,   24,  248 
+.byte    87,  174,    8,  197,   19,  205,  134,  185 
+.byte   255,  125,  193,   49,  245,  138,  106,  177 
+.byte   209,   32,  215,    2,   34,    4,  104,  113 
+.byte     7,  219,  157,  153,   97,  190,  230,   89 
+.byte   221,   81,  144,  220,  154,  163,  171,  208 
+.byte   129,   15,   71,   26,  227,  236,  141,  191 
+.byte   150,  123,   92,  162,  161,   99,   35,   77 
+.byte   200,  158,  156,   58,   12,   46,  186,  110 
+.byte   159,   90,  242,  146,  243,   73,  120,  204 
+.byte    21,  251,  112,  117,  127,   53,   16,    3 
+.byte   100,  109,  198,  116,  213,  180,  234,    9 
+.byte   118,   25,  254,   64,   18,  224,  189,    5 
+.byte   250,    1,  240,   42,   94,  169,   86,   67 
+.byte   133,   20,  137,  155,  176,  229,   72,  121 
+.byte   151,  252,   30,  130,   33,  140,   27,   95 
+.byte   119,   84,  178,   29,   37,   79,    0,   70 
+.byte   237,   88,   82,  235,  126,  218,  201,  253 
+.byte    48,  149,  101,   60,  182,  228,  187,  124 
+.byte    14,   80,   57,   38,   50,  132,  105,  147 
+.byte    55,  231,   36,  164,  203,   83,   10,  135 
+.byte   217,   76,  131,  143,  206,   59,   74,  183 
+
+/******************************************************************************/
+
+/*
+static
+uint64_t f_function(const uint64_t* a, uint32_t k0, uint32_t k1){
+	uint32_t c,d;
+
+	c = *a & 0x00000000FFFFFFFFLL;
+	d = (*a>>32) & 0x00000000FFFFFFFFLL;
+	
+	c ^= k0; d ^= k1;
+	d ^= c;
+	d = g_function(d);
+	c = bigendian_sum32(c,d);
+	c = g_function(c);
+	d = bigendian_sum32(c,d);
+	d = g_function(d);
+	c = bigendian_sum32(c,d);	
+	return ((uint64_t)d << 32) | c;
+}
+*/
+/*
+ * param a   r24:r25
+ * param k0  r20:r23
+ * param k1  r16:r19
+ */
+D0 = 10
+D1 = 11
+C0 = 12
+C1 = 13
+C2 = 14
+C3 = 15
+D2 = 16
+D3 = 17
+seed_f_function:
+	push_range 10, 17
+	movw r30, r24
+	ld C0, Z+
+	ld C1, Z+
+	ld C2, Z+
+	ld C3, Z+
+	eor C0, r20
+	eor C1, r21
+	eor C2, r22
+	eor C3, r23
+	ld r22, Z+
+	ld r23, Z+
+	ld r24, Z+
+	ld r25, Z+
+	eor r22, r16
+	eor r23, r17
+	eor r24, r18
+	eor r25, r19
+	eor r22, C0
+	eor r23, C1
+	eor r24, C2
+	eor r25, C3
+	rcall seed_g_function
+	mov D0, r22
+	mov D1, r23
+	mov D2, r24
+	mov D3, r25
+
+	add r25, C3
+	adc r24, C2
+	adc r23, C1
+	adc r22, C0
+	rcall seed_g_function
+	mov C0, r22
+	mov C1, r23
+	mov C2, r24
+	mov C3, r25
+
+	add r25, D3
+	adc r24, D2
+	adc r23, D1
+	adc r22, D0
+	rcall seed_g_function
+	mov D0, r22
+	mov D1, r23
+	mov D2, r24
+	mov D3, r25
+
+	add C3, r25
+	adc C2, r24
+	adc C1, r23
+	adc C0, r22
+
+	mov r18, C0
+	mov r19, C1
+	mov r20, C2
+	mov r21, C3
+	
+	pop_range 10, 17
+	ret
+
+/******************************************************************************/
+/*
+void seed_init(uint8_t * key, seed_ctx_t * ctx){
+	memcpy(ctx->k, key, 128/8);
+}
+*/
+
+.global seed_init
+seed_init:
+	movw r26, r24
+	movw r30, r22
+	ldi r22, 16
+1:
+	ld r0, X+
+	st Z+, r0
+	dec r22
+	brne 1b	
+	ret
+/******************************************************************************/
+/*
+typedef struct {
+	uint32_t k0, k1;
+} keypair_t;
+
+keypair_t getnextkeys(uint32_t *keystate, uint8_t curround){
+	keypair_t ret;
+	if (curround>15){
+		/ * ERROR * /
+		ret.k0 = ret.k1 = 0;
+	} else {
+	/ *	ret.k0 = seed_g_function(keystate[0] + keystate[2] - pgm_read_dword(&(seed_kc[curround])));
+		ret.k1 = seed_g_function(keystate[1] - keystate[3] + pgm_read_dword(&(seed_kc[curround]))); * /
+		ret.k0 = bigendian_sum32(keystate[0], keystate[2]);
+		ret.k0 = bigendian_sub32(ret.k0, pgm_read_dword(&(seed_kc[curround])));
+		ret.k0 = seed_g_function(ret.k0);
+		ret.k1 = bigendian_sub32(keystate[1], keystate[3]);
+		ret.k1 = bigendian_sum32(ret.k1, pgm_read_dword(&(seed_kc[curround])));
+		ret.k1 = seed_g_function(ret.k1);
+		
+		if (curround & 1){
+			/ * odd round (1,3,5, ...) * /
+			((uint64_t*)keystate)[1] = bigendian_rotl8_64( ((uint64_t*)keystate)[1] );
+		} else {
+			/ * even round (0,2,4, ...) * /
+			((uint64_t*)keystate)[0] = bigendian_rotr8_64(((uint64_t*)keystate)[0]);
+		}
+	}
+	return ret;
+}
+*/
+/*
+ * param keystate: r24:r25
+ * param curround: r22
+ */
+XRC0 = 10
+XRC1 = 11
+XRC2 = 12
+XRC3 = 13
+D0 = 14
+D1 = 15
+D2 = 16
+D3 = 17
+
+compute_keys:
+	ldi r30, lo8(seed_kc)
+	ldi r31, hi8(seed_kc)
+	lsl r22
+	lsl r22
+	add r30, r22
+	adc r31, r1
+	lpm XRC0, Z+
+	lpm XRC1, Z+
+	lpm XRC2, Z+
+	lpm XRC3, Z+
+	movw r28, r24
+	ldd r25, Y+0*4+3
+	ldd r24, Y+0*4+2
+	ldd r23, Y+0*4+1
+	ldd r22, Y+0*4+0
+	
+	ldd r0, Y+2*4+3
+	add r25, r0
+	ldd r0, Y+2*4+2
+	adc r24, r0
+	ldd r0, Y+2*4+1
+	adc r23, r0
+	ldd r0, Y+2*4+0
+	adc r22, r0
+
+	sub r25, XRC3
+	sbc r24, XRC2
+	sbc r23, XRC1
+	sbc r22, XRC0
+	rcall seed_g_function
+	mov D0, r22
+	mov D1, r23
+	mov D2, r24
+	mov D3, r25
+	
+
+	ldd r25, Y+1*4+3
+	ldd r24, Y+1*4+2
+	ldd r23, Y+1*4+1
+	ldd r22, Y+1*4+0
+
+	ldd r0, Y+3*4+3
+	sub r25, r0
+	ldd r0, Y+3*4+2
+	sbc r24, r0
+	ldd r0, Y+3*4+1
+	sbc r23, r0
+	ldd r0, Y+3*4+0
+	sbc r22, r0
+
+	add r25, XRC3
+	adc r24, XRC2
+	adc r23, XRC1
+	adc r22, XRC0
+	rcall seed_g_function
+
+	mov r21, D3
+	mov r20, D2
+	mov r19, D1
+	mov r18, D0 
+	ret
+
+seed_getnextkeys:
+	push_range 10, 17
+	push r28
+	push r29
+;	andi r22, 0x0F
+	bst r22,0
+	rcall compute_keys		
+	brtc even_round
+odd_round:
+
+	adiw r28, 8
+	ld r26, Y
+	ldd r0, Y+1
+	std Y+0, r0
+	ldd r0, Y+2
+	std Y+1, r0
+	ldd r0, Y+3
+	std Y+2, r0
+	ldd r0, Y+4
+	std Y+3, r0
+	ldd r0, Y+5
+	std Y+4, r0
+	ldd r0, Y+6
+	std Y+5, r0
+	ldd r0, Y+7
+	std Y+6, r0
+	std Y+7, r26	
+/*
+	movw r30, r28
+	ld r26, Z+
+	ldi r27, 7
+1:
+	ld r0, Z+
+	st Y+, r0
+	dec r27
+	brne 1b
+	st Y, r26
+*/	
+	rjmp 4f
+
+even_round:
+
+	ldd r26, Y+7
+	ldd r0, Y+6
+	std Y+7, r0
+	ldd r0, Y+5
+	std Y+6, r0
+	ldd r0, Y+4
+	std Y+5, r0
+	ldd r0, Y+3
+	std Y+4, r0
+	ldd r0, Y+2
+	std Y+3, r0
+	ldd r0, Y+1
+	std Y+2, r0
+	ldd r0, Y+0
+	std Y+1, r0
+	std Y+0, r26
+/*
+	adiw r28, 7	
+	ld r26, Y
+	ldi r27, 7	
+1:
+	ld r0, -Y
+	std Y+1, r0
+	dec r27
+	brne 1b
+	st Y, r26
+*/
+4:	
+	pop r29
+	pop r28
+	pop_range 10, 17
+	ret
+
+/******************************************************************************/
+/*
+keypair_t getprevkeys(uint32_t *keystate, uint8_t curround){
+	keypair_t ret;
+	if (curround>15){
+		/ * ERROR * /
+		ret.k0 = ret.k1 = 0;
+	} else {
+		if (curround & 1){
+			/ * odd round (1,3,5, ..., 15) * /
+			((uint64_t*)keystate)[1] = bigendian_rotr8_64( ((uint64_t*)keystate)[1] );
+		} else {
+			/ * even round (0,2,4, ..., 14) * /
+			((uint64_t*)keystate)[0] = bigendian_rotl8_64(((uint64_t*)keystate)[0]);
+		}
+	/ *	ret.k0 = seed_g_function(keystate[0] + keystate[2] - pgm_read_dword(&(seed_kc[curround])));
+		ret.k1 = seed_g_function(keystate[1] - keystate[3] + pgm_read_dword(&(seed_kc[curround]))); * /
+		ret.k0 = bigendian_sum32(keystate[0], keystate[2]);
+		ret.k0 = bigendian_sub32(ret.k0, pgm_read_dword(&(seed_kc[curround])));
+		ret.k0 = seed_g_function(ret.k0);
+		ret.k1 = bigendian_sub32(keystate[1], keystate[3]);
+		ret.k1 = bigendian_sum32(ret.k1, pgm_read_dword(&(seed_kc[curround])));
+		ret.k1 = seed_g_function(ret.k1);
+		}
+	return ret;
+}
+*/
+/*
+ * param keystate: r24:r25
+ * param curround: r22
+ */
+
+seed_getprevkeys:
+	push_range 10, 17
+	push r28
+	push r29
+	movw r28, r24	
+;	andi r22, 0x0F
+	bst r22, 0
+	brts r_odd_round
+r_even_round:
+	ldd r26, Y+0
+	ldd r0, Y+1
+	std Y+0, r0
+	ldd r0, Y+2
+	std Y+1, r0
+	ldd r0, Y+3
+	std Y+2, r0
+	ldd r0, Y+4
+	std Y+3, r0
+	ldd r0, Y+5
+	std Y+4, r0
+	ldd r0, Y+6
+	std Y+5, r0
+	ldd r0, Y+7
+	std Y+6, r0
+	std Y+7, r26	
+/*
+	movw r30, r28
+	ld r26, Z+
+	ldi r27, 7
+1:
+	ld r0, Z+
+	st Y+, r0
+	dec r27
+	brne 1b
+	st Y, r26
+*/	
+
+	rjmp 4f
+r_odd_round:
+	ldd r26, Y+8+7
+	ldd r0, Y+8+6
+	std Y+8+7, r0
+	ldd r0, Y+8+5
+	std Y+8+6, r0
+	ldd r0, Y+8+4
+	std Y+8+5, r0
+	ldd r0, Y+8+3
+	std Y+8+4, r0
+	ldd r0, Y+8+2
+	std Y+8+3, r0
+	ldd r0, Y+8+1
+	std Y+8+2, r0
+	ldd r0, Y+8+0
+	std Y+8+1, r0
+	std Y+8+0, r26
+/*
+	adiw r28, 7	
+	ld r26, Y
+	ldi r27, 7	
+1:
+	ld r0, -Y
+	std Y+1, r0
+	dec r27
+	brne 1b
+	st Y, r26
+*/
+4:
+	rcall compute_keys	
+
+	pop r29
+	pop r28
+	pop_range 10, 17
+	ret
+
+/******************************************************************************/
+
+seed_kc:
+.long   0xb979379e 
+.long   0x73f36e3c
+.long   0xe6e6dd78 
+.long   0xcccdbbf1 
+.long   0x999b77e3 
+.long   0x3337efc6 
+.long   0x676ede8d 
+.long   0xcfdcbc1b 
+.long   0x9eb97937
+.long   0x3c73f36e	
+.long   0x78e6e6dd
+.long   0xf1cccdbb
+.long   0xe3999b77
+.long   0xc63337ef
+.long   0x8d676ede
+.long   0x1bcfdcbc
+
+/******************************************************************************/
+/*
+#define L (((uint64_t*)buffer)[0])
+#define R (((uint64_t*)buffer)[1])
+
+void seed_enc(void * buffer, seed_ctx_t * ctx){
+	uint8_t r;
+	keypair_t k;
+	for(r=0; r<8; ++r){
+			k = seed_getnextkeys(ctx->k, 2*r);
+/ *
+	DEBUG_S("\r\n\tDBG ka,0: "); uart_hexdump(&k.k0, 4);
+	DEBUG_S("\r\n\tDBG ka,1: "); uart_hexdump(&k.k1, 4);
+	DEBUG_S("\r\n\t DBG L:   "); uart_hexdump((uint8_t*)buffer+0, 8);
+	DEBUG_S("\r\n\t DBG R:   "); uart_hexdump((uint8_t*)buffer+8, 8);
+* /
+			L ^= seed_f_function(&R,k.k0,k.k1);
+			
+			k = seed_getnextkeys(ctx->k, 2*r+1);
+/ *
+	DEBUG_S("\r\n\tDBG kb,0: "); uart_hexdump(&k.k0, 4);
+	DEBUG_S("\r\n\tDBG kb,1: "); uart_hexdump(&k.k1, 4);
+	DEBUG_S("\r\n\t DBG L:   "); uart_hexdump((uint8_t*)buffer+8, 8);
+	DEBUG_S("\r\n\t DBG R:   "); uart_hexdump((uint8_t*)buffer+0, 8);
+* /
+			R ^= seed_f_function(&L,k.k0,k.k1);
+	}
+	/ * just an exchange without temp. variable * /
+	L ^= R;
+	R ^= L;
+	L ^= R;
+}
+*/
+/*
+ * param buffer: r24:r25
+ * param ctx:    r22:r23
+ */
+CTR  = 9
+xLPTR = 10
+xRPTR = 12
+CPTR = 14
+
+.global seed_enc
+seed_enc:
+	push_range 9, 17
+	push r28
+	push r29	
+	clr CTR
+	movw xLPTR, r24
+	adiw r24, 8
+	movw xRPTR, r24		
+	movw CPTR, r22
+1:
+	movw r28, xLPTR
+	movw r24, CPTR
+	mov r22, CTR
+	lsl r22
+	rcall seed_getnextkeys	
+
+	/* use pen & paper to understand the following permutation */
+	movw r16, r22
+	movw r22, r18
+	movw r18, r24
+	movw r24, r20
+	movw r20, r22
+	movw r22, r24
+	movw r24, xRPTR	
+
+	rcall seed_f_function
+
+	ld r0, Y
+	eor r0, r18
+	st Y+, r0
+	ld r0, Y
+	eor r0, r19
+	st Y+, r0
+	ld r0, Y
+	eor r0, r20
+	st Y+, r0
+	ld r0, Y
+	eor r0, r21
+	st Y+, r0
+	ld r0, Y
+	eor r0, r22
+	st Y+, r0
+	ld r0, Y
+	eor r0, r23
+	st Y+, r0
+	ld r0, Y
+	eor r0, r24
+	st Y+, r0
+	ld r0, Y
+	eor r0, r25
+	st Y+, r0
+	/* secound half */
+	movw r24, CPTR
+	mov r22, CTR
+	lsl r22
+	inc r22
+	rcall seed_getnextkeys	
+
+	movw r16, r22
+	movw r22, r18
+	movw r18, r24
+	movw r24, r20
+	movw r20, r22
+	movw r22, r24
+	movw r24, xLPTR	
+	
+	rcall seed_f_function
+
+	ld r0, Y
+	eor r0, r18
+	st Y+, r0
+	ld r0, Y
+	eor r0, r19
+	st Y+, r0
+	ld r0, Y
+	eor r0, r20
+	st Y+, r0
+	ld r0, Y
+	eor r0, r21
+	st Y+, r0
+	ld r0, Y
+	eor r0, r22
+	st Y+, r0
+	ld r0, Y
+	eor r0, r23
+	st Y+, r0
+	ld r0, Y
+	eor r0, r24
+	st Y+, r0
+	ld r0, Y
+	eor r0, r25
+	st Y+, r0
+	
+	inc CTR
+	bst CTR, 3
+	brts 3f
+	rjmp 1b
+3:
+	movw r28, xLPTR
+	movw r30, xRPTR
+	ldi r17, 8
+4:
+	ld r10, Y
+	ld r11, Z
+	st Z+, r10
+	st Y+, r11
+	dec r17
+	brne 4b
+5:
+	pop r29
+	pop r28
+	pop_range 9, 17
+	ret
+
+/******************************************************************************/
+/*
+#define L (((uint64_t*)buffer)[0])
+#define R (((uint64_t*)buffer)[1])
+
+void seed_dec(void * buffer, seed_ctx_t * ctx){
+	int8_t r;
+	keypair_t k;
+	for(r=7; r>=0; --r){
+			k = seed_getprevkeys(ctx->k, 2*r+1);
+/ *
+	DEBUG_S("\r\n\tDBG ka,0: "); uart_hexdump(&k.k0, 4);
+	DEBUG_S("\r\n\tDBG ka,1: "); uart_hexdump(&k.k1, 4);
+	DEBUG_S("\r\n\t DBG L: "); uart_hexdump((uint8_t*)buffer+0, 8);
+	DEBUG_S("\r\n\t DBG R: "); uart_hexdump((uint8_t*)buffer+8, 8);
+* /
+			L ^= seed_f_function(&R,k.k0,k.k1);
+			
+			k = seed_getprevkeys(ctx->k, 2*r+0);
+/ *
+	DEBUG_S("\r\n\tDBG kb,0: "); uart_hexdump(&k.k0, 4);
+	DEBUG_S("\r\n\tDBG kb,1: "); uart_hexdump(&k.k1, 4);
+	DEBUG_S("\r\n\t DBG L: "); uart_hexdump((uint8_t*)buffer+8, 8);
+	DEBUG_S("\r\n\t DBG R: "); uart_hexdump((uint8_t*)buffer+0, 8);
+* /
+			R ^= seed_f_function(&L,k.k0,k.k1);
+	}
+	/ * just an exchange without temp. variable * /
+	L ^= R;
+	R ^= L;
+	L ^= R;
+}
+*/
+/*
+ * param buffer: r24:r25
+ * param ctx:    r22:r23
+ */
+CTR  = 9
+xLPTR = 10
+xRPTR = 12
+CPTR = 14
+
+.global seed_dec
+seed_dec:
+	push_range 9, 17
+	push r28
+	push r29	
+	ldi r16, 7
+	mov CTR, r16
+	movw xLPTR, r24
+	adiw r24, 8
+	movw xRPTR, r24		
+	movw CPTR, r22
+1:
+	movw r28, xLPTR
+	movw r24, CPTR
+	mov r22, CTR
+	lsl r22
+	inc r22
+	rcall seed_getprevkeys	
+
+	/* use pen & paper to understand the following permutation */
+	movw r16, r22
+	movw r22, r18
+	movw r18, r24
+	movw r24, r20
+	movw r20, r22
+	movw r22, r24
+	movw r24, xRPTR	
+
+	rcall seed_f_function
+
+	ld r0, Y
+	eor r0, r18
+	st Y+, r0
+	ld r0, Y
+	eor r0, r19
+	st Y+, r0
+	ld r0, Y
+	eor r0, r20
+	st Y+, r0
+	ld r0, Y
+	eor r0, r21
+	st Y+, r0
+	ld r0, Y
+	eor r0, r22
+	st Y+, r0
+	ld r0, Y
+	eor r0, r23
+	st Y+, r0
+	ld r0, Y
+	eor r0, r24
+	st Y+, r0
+	ld r0, Y
+	eor r0, r25
+	st Y+, r0
+	/* secound half */
+	movw r24, CPTR
+	mov r22, CTR
+	lsl r22
+	rcall seed_getprevkeys	
+
+	movw r16, r22
+	movw r22, r18
+	movw r18, r24
+	movw r24, r20
+	movw r20, r22
+	movw r22, r24
+	movw r24, xLPTR	
+	
+	rcall seed_f_function
+
+	ld r0, Y
+	eor r0, r18
+	st Y+, r0
+	ld r0, Y
+	eor r0, r19
+	st Y+, r0
+	ld r0, Y
+	eor r0, r20
+	st Y+, r0
+	ld r0, Y
+	eor r0, r21
+	st Y+, r0
+	ld r0, Y
+	eor r0, r22
+	st Y+, r0
+	ld r0, Y
+	eor r0, r23
+	st Y+, r0
+	ld r0, Y
+	eor r0, r24
+	st Y+, r0
+	ld r0, Y
+	eor r0, r25
+	st Y+, r0
+	
+	dec CTR
+	brmi 3f
+	rjmp 1b
+3:
+	movw r28, xLPTR
+	movw r30, xRPTR
+	ldi r17, 8
+4:
+	ld r10, Y
+	ld r11, Z
+	st Z+, r10
+	st Y+, r11
+	dec r17
+	brne 4b
+5:
+	pop r29
+	pop r28
+	pop_range 9, 17
+	ret
+
diff --git a/seed/seed.h b/seed/seed.h
new file mode 100644
index 0000000..78d7d59
--- /dev/null
+++ b/seed/seed.h
@@ -0,0 +1,81 @@
+/* seed.h */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/**
+ * \file	seed.h
+ * \author	Daniel Otte 
+ * \date	2007-06-1
+ * \brief 	declarations for seed
+ * \par License	
+ * GPL
+ * 
+ */
+#ifndef SEED_H_
+#define SEED_H_
+
+#include <stdint.h>
+/** \typedef seed_ctx_t
+ * \brief SEED context
+ * 
+ * A variable of this type may hold the key material for the SEED cipher. 
+ * This context is regulary generated by the 
+ * void seed_init(const void * key, seed_ctx_t * ctx) function.
+ */
+typedef struct{
+	uint32_t k[4];
+} seed_ctx_t;
+
+/******************************************************************************/
+
+/** \fn void seed_init(const void * key, seed_ctx_t * ctx)
+ * \brief initializes context for SEED operation
+ * 
+ * This function copys the key material into a context variable.
+ * 
+ * \param key  pointer to the key material (128 bit = 16 bytes)
+ * \param ctx  pointer to the context (seed_ctx_t)
+ */
+void seed_init(const void * key, seed_ctx_t * ctx);
+
+/** \fn void seed_enc(void * buffer,const seed_ctx_t * ctx)
+ * \brief encrypt a block with SEED
+ * 
+ * This function encrypts a block of 64 bits (8 bytes) with the SEED algorithm.
+ * The round keys are computed on demand, so the context is modifyed while
+ * encrypting but the original stated is restored when the function exits.
+ * 
+ * \param buffer pointer to the block (64 bit = 8 byte) which will be encrypted
+ * \param ctx    pointer to the key material (seed_ctx_t)
+ */
+void seed_enc(void * buffer, const seed_ctx_t * ctx);
+
+
+/** \fn void seed_dec(void * buffer, const seed_ctx_t * ctx)
+ * \brief decrypt a block with SEED
+ * 
+ * This function decrypts a block of 64 bits (8 bytes) with the SEED algorithm.
+ * The round keys are computed on demand, so the context is modifyed while
+ * decrypting but the original stated is restored when the function exits.
+ * 
+ * \param buffer pointer to the block (64 bit = 8 byte) which will be decrypted
+ * \param ctx    pointer to the key material (seed_ctx_t)
+ */
+void seed_dec(void * buffer, const seed_ctx_t * ctx);
+
+	
+#endif /*SEED_H_*/
diff --git a/seed/seed_C.c b/seed/seed_C.c
new file mode 100644
index 0000000..7a9afc0
--- /dev/null
+++ b/seed/seed_C.c
@@ -0,0 +1,288 @@
+/* seed_C.c */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+ /**
+ * \file	seed_C.c
+ * \author	Daniel Otte 
+ * \date	2007-06-1
+ * \brief	SEED parts in C for AVR
+ * \par License	
+ * GPL
+ * 
+ */
+#include <stdint.h>
+#include <avr/pgmspace.h>
+#include <string.h>
+#include "seed_sbox.h"
+#include "cli.h"
+#include "debug.h"
+
+
+static
+uint32_t g_function(uint32_t x);
+/******************************************************************************/
+
+static
+void changeendian32(uint32_t * a){
+	*a = (*a & 0x000000FF) << 24 |
+		 (*a & 0x0000FF00) <<  8 |
+		 (*a & 0x00FF0000) >>  8 |
+		 (*a & 0xFF000000) >> 24;
+}
+
+/******************************************************************************/
+static
+uint32_t bigendian_sum32(uint32_t a, uint32_t b){
+	changeendian32(&a);
+	changeendian32(&b);
+	a += b;
+	changeendian32(&a);
+	return a;
+}
+
+/******************************************************************************/
+static 
+uint32_t bigendian_sub32(uint32_t a, uint32_t b){
+	changeendian32(&a);
+	changeendian32(&b);
+	a -= b;
+	changeendian32(&a);
+	return a;
+}
+
+/******************************************************************************/
+static inline
+uint64_t bigendian_rotl8_64(uint64_t a){
+	/*
+	changeendian64(&a);
+	a = (a<<8) | (a>>(64-8));
+	changeendian64(&a);
+	*/
+	a = (a>>8) | (a<<(64-8));
+	return a;
+}
+
+/******************************************************************************/
+static inline
+uint64_t bigendian_rotr8_64(uint64_t a){
+	/*
+	changeendian64(&a);
+	a = (a>>8) | (a<<(64-8));
+	changeendian64(&a);
+	*/
+	a = (a<<8) | (a>>(64-8));
+	return a;
+}
+
+/******************************************************************************/
+static
+uint64_t f_function(const uint64_t* a, uint32_t k0, uint32_t k1){
+	uint32_t c,d;
+
+	c = *a & 0x00000000FFFFFFFFLL;
+	d = (*a>>32) & 0x00000000FFFFFFFFLL;
+	
+	c ^= k0; d ^= k1;
+	d ^= c;
+	d = g_function(d);
+	c = bigendian_sum32(c,d);
+	c = g_function(c);
+	d = bigendian_sum32(c,d);
+	d = g_function(d);
+	c = bigendian_sum32(c,d);
+	return ((uint64_t)d << 32) | c;
+}
+
+/******************************************************************************/
+#define M0 0xfc
+#define M1 0xf3
+#define M2 0xcf
+#define M3 0x3f
+
+#define X3 (((uint8_t*)(&x))[0])
+#define X2 (((uint8_t*)(&x))[1])
+#define X1 (((uint8_t*)(&x))[2])
+#define X0 (((uint8_t*)(&x))[3])
+
+#define Z3 (((uint8_t*)(&z))[0])
+#define Z2 (((uint8_t*)(&z))[1])
+#define Z1 (((uint8_t*)(&z))[2])
+#define Z0 (((uint8_t*)(&z))[3])
+
+static
+uint32_t g_function(uint32_t x){
+	uint32_t z;
+	/* sbox substitution */
+	X3 = pgm_read_byte(&(seed_sbox2[X3]));
+	X2 = pgm_read_byte(&(seed_sbox1[X2]));
+	X1 = pgm_read_byte(&(seed_sbox2[X1]));
+	X0 = pgm_read_byte(&(seed_sbox1[X0]));
+	/* now the permutation */
+	Z0 = (X0 & M0) ^ (X1 & M1) ^ (X2 & M2) ^ (X3 & M3);
+	Z1 = (X0 & M1) ^ (X1 & M2) ^ (X2 & M3) ^ (X3 & M0);
+	Z2 = (X0 & M2) ^ (X1 & M3) ^ (X2 & M0) ^ (X3 & M1);
+	Z3 = (X0 & M3) ^ (X1 & M0) ^ (X2 & M1) ^ (X3 & M2);
+	return z;
+}
+/******************************************************************************/
+typedef struct {
+	uint32_t k0, k1;
+} keypair_t;
+
+keypair_t getnextkeys(uint32_t *keystate, uint8_t curround){
+	keypair_t ret;
+	if (curround>15){
+		/* ERROR */
+		ret.k0 = ret.k1 = 0;
+	} else {
+	/*	ret.k0 = g_function(keystate[0] + keystate[2] - pgm_read_dword(&(seed_kc[curround])));
+		ret.k1 = g_function(keystate[1] - keystate[3] + pgm_read_dword(&(seed_kc[curround]))); */
+		ret.k0 = bigendian_sum32(keystate[0], keystate[2]);
+		ret.k0 = bigendian_sub32(ret.k0, pgm_read_dword(&(seed_kc[curround])));
+		ret.k0 = g_function(ret.k0);
+		ret.k1 = bigendian_sub32(keystate[1], keystate[3]);
+		ret.k1 = bigendian_sum32(ret.k1, pgm_read_dword(&(seed_kc[curround])));
+		ret.k1 = g_function(ret.k1);
+		
+		if (curround & 1){
+			/* odd round (1,3,5, ...) */
+			((uint64_t*)keystate)[1] = bigendian_rotl8_64( ((uint64_t*)keystate)[1] );
+		} else {
+			/* even round (0,2,4, ...) */
+			((uint64_t*)keystate)[0] = bigendian_rotr8_64(((uint64_t*)keystate)[0]);
+		}
+	}
+	return ret;
+}
+
+
+/******************************************************************************/
+
+keypair_t getprevkeys(uint32_t *keystate, uint8_t curround){
+	keypair_t ret;
+	if (curround>15){
+		/* ERROR */
+		ret.k0 = ret.k1 = 0;
+	} else {
+		if (curround & 1){
+			/* odd round (1,3,5, ..., 15) */
+			((uint64_t*)keystate)[1] = bigendian_rotr8_64( ((uint64_t*)keystate)[1] );
+		} else {
+			/* even round (0,2,4, ..., 14) */
+			((uint64_t*)keystate)[0] = bigendian_rotl8_64(((uint64_t*)keystate)[0]);
+		}
+	/*	ret.k0 = g_function(keystate[0] + keystate[2] - pgm_read_dword(&(seed_kc[curround])));
+		ret.k1 = g_function(keystate[1] - keystate[3] + pgm_read_dword(&(seed_kc[curround]))); */
+		ret.k0 = bigendian_sum32(keystate[0], keystate[2]);
+		ret.k0 = bigendian_sub32(ret.k0, pgm_read_dword(&(seed_kc[curround])));
+		ret.k0 = g_function(ret.k0);
+		ret.k1 = bigendian_sub32(keystate[1], keystate[3]);
+		ret.k1 = bigendian_sum32(ret.k1, pgm_read_dword(&(seed_kc[curround])));
+		ret.k1 = g_function(ret.k1);
+		}
+	return ret;
+}
+
+/******************************************************************************/
+
+typedef struct{
+	uint32_t k[4];
+} seed_ctx_t;
+
+/******************************************************************************/
+
+void seed_init(const void * key, seed_ctx_t * ctx){
+	memcpy(ctx->k, key, 128/8);
+}
+
+/******************************************************************************/
+
+#define L (((uint64_t*)buffer)[0])
+#define R (((uint64_t*)buffer)[1])
+
+void seed_enc(void * buffer, const seed_ctx_t * ctx){
+	uint8_t r;
+	keypair_t k;
+	for(r=0; r<8; ++r){
+			k = getnextkeys(((seed_ctx_t*)ctx)->k, 2*r);
+/*
+	DEBUG_S("\r\n\tDBG ka,0: "); cli_hexdump(&k.k0, 4);
+	DEBUG_S("\r\n\tDBG ka,1: "); cli_hexdump(&k.k1, 4);
+	DEBUG_S("\r\n\t DBG L: "); cli_hexdump((uint8_t*)buffer+0, 8);
+	DEBUG_S("\r\n\t DBG R: "); cli_hexdump((uint8_t*)buffer+8, 8);
+*/
+			L ^= f_function(&R,k.k0,k.k1);
+			
+			k = getnextkeys(((seed_ctx_t*)ctx)->k, 2*r+1);
+/*
+	DEBUG_S("\r\n\tDBG kb,0: "); cli_hexdump(&k.k0, 4);
+	DEBUG_S("\r\n\tDBG kb,1: "); cli_hexdump(&k.k1, 4);
+	DEBUG_S("\r\n\t DBG L: "); cli_hexdump((uint8_t*)buffer+8, 8);
+	DEBUG_S("\r\n\t DBG R: "); cli_hexdump((uint8_t*)buffer+0, 8);
+*/
+			R ^= f_function(&L,k.k0,k.k1);
+	}
+	/* just an exchange without temp. variable */
+	L ^= R;
+	R ^= L;
+	L ^= R;
+}
+
+/******************************************************************************/
+
+#define L (((uint64_t*)buffer)[0])
+#define R (((uint64_t*)buffer)[1])
+
+void seed_dec(void * buffer, seed_ctx_t * ctx){
+	int8_t r;
+	keypair_t k;
+	for(r=7; r>=0; --r){
+			k = getprevkeys(((seed_ctx_t*)ctx)->k, 2*r+1);
+/*
+	DEBUG_S("\r\n\tDBG ka,0: "); cli_hexdump(&k.k0, 4);
+	DEBUG_S("\r\n\tDBG ka,1: "); cli_hexdump(&k.k1, 4);
+	DEBUG_S("\r\n\t DBG L: "); cli_hexdump((uint8_t*)buffer+0, 8);
+	DEBUG_S("\r\n\t DBG R: "); cli_hexdump((uint8_t*)buffer+8, 8);
+*/
+			L ^= f_function(&R,k.k0,k.k1);
+			
+			k = getprevkeys(((seed_ctx_t*)ctx)->k, 2*r+0);
+/*
+	DEBUG_S("\r\n\tDBG kb,0: "); cli_hexdump(&k.k0, 4);
+	DEBUG_S("\r\n\tDBG kb,1: "); cli_hexdump(&k.k1, 4);
+	DEBUG_S("\r\n\t DBG L: "); cli_hexdump((uint8_t*)buffer+8, 8);
+	DEBUG_S("\r\n\t DBG R: "); cli_hexdump((uint8_t*)buffer+0, 8);
+*/
+			R ^= f_function(&L,k.k0,k.k1);
+	}
+	/* just an exchange without temp. variable */
+	L ^= R;
+	R ^= L;
+	L ^= R;
+}
+
+
+
+
+
+
+
+
+
+
+
diff --git a/seed/seed_sbox.h b/seed/seed_sbox.h
new file mode 100644
index 0000000..9937913
--- /dev/null
+++ b/seed/seed_sbox.h
@@ -0,0 +1,125 @@
+/* seed_sbox.h */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/**
+ * \file	seed_sbox.h
+ * \author	Daniel Otte 
+ * \date	2007-06-1
+ * \brief 	sboxes and constants for seed
+ * \par License	
+ * GPL
+ * 
+ */
+ 
+#ifndef SEED_SBOX_H_
+#define SEED_SBOX_H_
+
+#include <stdint.h>
+#include <avr/pgmspace.h>
+
+uint8_t seed_sbox1[256] PROGMEM ={
+	169,  133,  214,  211,   84,   29,  172,   37, 
+	 93,   67,   24,   30,   81,  252,  202,   99, 
+	 40,   68,   32,  157,  224,  226,  200,   23, 
+	165,  143,    3,  123,  187,   19,  210,  238, 
+	112,  140,   63,  168,   50,  221,  246,  116, 
+	236,  149,   11,   87,   92,   91,  189,    1, 
+	 36,   28,  115,  152,   16,  204,  242,  217, 
+	 44,   231, 114,  131,  155,  209,  134,  201, 
+	 96,   80,  163,  235,   13,  182,  158,   79, 
+	183,   90,  198,  120,  166,   18,  175,  213, 
+	 97,  195,  180,   65,   82,  125,  141,    8, 
+	 31,  153,    0,   25,    4,   83,  247,  225, 
+	253,  118,   47,   39,  176,  139,   14,  171, 
+	162,  110,  147,   77,  105,  124,    9,   10, 
+	191,  239,  243,  197,  135,   20,  254,  100, 
+	222,   46,   75,   26,    6,   33,  107,  102, 
+	  2,  245,  146,  138,   12,  179,  126,  208, 
+	122,   71,  150,  229,   38,  128,  173,  223, 
+	161,   48,   55,  174,   54,   21,   34,   56, 
+	244,  167,   69,   76,  129,  233,  132,  151, 
+	 53,  203,  206,   60,  113,   17,  199,  137, 
+	117,  251,  218,  248,  148,   89,  130,  196, 
+	255,   73,   57,  103,  192,  207,  215,  184, 
+	 15,  142,   66,   35,  145,  108,  219,  164, 
+	 52,  241,   72,  194,  111,   61,   45,   64, 
+	190,   62,  188,  193,  170,  186,   78,   85, 
+	 59,  220,  104,  127,  156,  216,   74,   86, 
+	119,  160,  237,   70,  181,   43,  101,  250, 
+	227,  185,  177,  159,   94,  249,  230,  178, 
+	 49,  234,  109,   95,  228,  240,  205,  136, 
+	 22,   58,   88,  212,   98,   41,    7,   51, 
+	232,   27,    5,  121,  144,  106,   42,  154
+};
+
+uint8_t seed_sbox2[256] PROGMEM ={
+	 56,  232,   45,  166,  207,  222,  179,  184, 
+	175,   96,   85,  199,   68,  111,  107,   91, 
+	195,   98,   51,  181,   41,  160,  226,  167, 
+	211,  145,   17,    6,   28,  188,   54,   75, 
+	239,  136,  108,  168,   23,  196,   22,  244, 
+	194,   69,  225,  214,   63,   61,  142,  152, 
+	 40,   78,  246,   62,  165,  249,   13,  223, 
+	216,   43,  102,  122,   39,   47,  241,  114, 
+	 66,  212,   65,  192,  115,  103,  172,  139, 
+	247,  173,  128,   31,  202,   44,  170,   52, 
+	210,   11,  238,  233,   93,  148,   24,  248, 
+	 87,  174,    8,  197,   19,  205,  134,  185, 
+	255,  125,  193,   49,  245,  138,  106,  177, 
+	209,   32,  215,    2,   34,    4,  104,  113, 
+	  7,  219,  157,  153,   97,  190,  230,   89, 
+	221,   81,  144,  220,  154,  163,  171,  208, 
+	129,   15,   71,   26,  227,  236,  141,  191, 
+	150,  123,   92,  162,  161,   99,   35,   77, 
+	200,  158,  156,   58,   12,   46,  186,  110, 
+	159,   90,  242,  146,  243,   73,  120,  204, 
+	 21,  251,  112,  117,  127,   53,   16,    3, 
+	100,  109,  198,  116,  213,  180,  234,    9, 
+	118,   25,  254,   64,   18,  224,  189,    5, 
+	250,    1,  240,   42,   94,  169,   86,   67, 
+	133,   20,  137,  155,  176,  229,   72,  121, 
+	151,  252,   30,  130,   33,  140,   27,   95, 
+	119,   84,  178,   29,   37,   79,    0,   70, 
+	237,   88,   82,  235,  126,  218,  201,  253, 
+	 48,  149,  101,   60,  182,  228,  187,  124, 
+	 14,   80,   57,   38,   50,  132,  105,  147, 
+	 55,  231,   36,  164,  203,   83,   10,  135, 
+	217,   76,  131,  143,  206,   59,   74,  183 
+};
+
+/* key constants */
+uint32_t seed_kc[16] PROGMEM ={
+	0xb979379e, 
+	0x73f36e3c,
+	0xe6e6dd78, 
+	0xcccdbbf1, 
+	0x999b77e3, 
+	0x3337efc6, 
+	0x676ede8d, 
+	0xcfdcbc1b, 
+	0x9eb97937,
+	0x3c73f36e,	
+	0x78e6e6dd,
+	0xf1cccdbb,
+	0xe3999b77,
+	0xc63337ef,
+	0x8d676ede,
+	0x1bcfdcbc
+};
+
+#endif /*SEED_SBOX_H_*/
diff --git a/seed_C.c b/seed_C.c
deleted file mode 100644
index 7a9afc0..0000000
--- a/seed_C.c
+++ /dev/null
@@ -1,288 +0,0 @@
-/* seed_C.c */
-/*
-    This file is part of the AVR-Crypto-Lib.
-    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
- /**
- * \file	seed_C.c
- * \author	Daniel Otte 
- * \date	2007-06-1
- * \brief	SEED parts in C for AVR
- * \par License	
- * GPL
- * 
- */
-#include <stdint.h>
-#include <avr/pgmspace.h>
-#include <string.h>
-#include "seed_sbox.h"
-#include "cli.h"
-#include "debug.h"
-
-
-static
-uint32_t g_function(uint32_t x);
-/******************************************************************************/
-
-static
-void changeendian32(uint32_t * a){
-	*a = (*a & 0x000000FF) << 24 |
-		 (*a & 0x0000FF00) <<  8 |
-		 (*a & 0x00FF0000) >>  8 |
-		 (*a & 0xFF000000) >> 24;
-}
-
-/******************************************************************************/
-static
-uint32_t bigendian_sum32(uint32_t a, uint32_t b){
-	changeendian32(&a);
-	changeendian32(&b);
-	a += b;
-	changeendian32(&a);
-	return a;
-}
-
-/******************************************************************************/
-static 
-uint32_t bigendian_sub32(uint32_t a, uint32_t b){
-	changeendian32(&a);
-	changeendian32(&b);
-	a -= b;
-	changeendian32(&a);
-	return a;
-}
-
-/******************************************************************************/
-static inline
-uint64_t bigendian_rotl8_64(uint64_t a){
-	/*
-	changeendian64(&a);
-	a = (a<<8) | (a>>(64-8));
-	changeendian64(&a);
-	*/
-	a = (a>>8) | (a<<(64-8));
-	return a;
-}
-
-/******************************************************************************/
-static inline
-uint64_t bigendian_rotr8_64(uint64_t a){
-	/*
-	changeendian64(&a);
-	a = (a>>8) | (a<<(64-8));
-	changeendian64(&a);
-	*/
-	a = (a<<8) | (a>>(64-8));
-	return a;
-}
-
-/******************************************************************************/
-static
-uint64_t f_function(const uint64_t* a, uint32_t k0, uint32_t k1){
-	uint32_t c,d;
-
-	c = *a & 0x00000000FFFFFFFFLL;
-	d = (*a>>32) & 0x00000000FFFFFFFFLL;
-	
-	c ^= k0; d ^= k1;
-	d ^= c;
-	d = g_function(d);
-	c = bigendian_sum32(c,d);
-	c = g_function(c);
-	d = bigendian_sum32(c,d);
-	d = g_function(d);
-	c = bigendian_sum32(c,d);
-	return ((uint64_t)d << 32) | c;
-}
-
-/******************************************************************************/
-#define M0 0xfc
-#define M1 0xf3
-#define M2 0xcf
-#define M3 0x3f
-
-#define X3 (((uint8_t*)(&x))[0])
-#define X2 (((uint8_t*)(&x))[1])
-#define X1 (((uint8_t*)(&x))[2])
-#define X0 (((uint8_t*)(&x))[3])
-
-#define Z3 (((uint8_t*)(&z))[0])
-#define Z2 (((uint8_t*)(&z))[1])
-#define Z1 (((uint8_t*)(&z))[2])
-#define Z0 (((uint8_t*)(&z))[3])
-
-static
-uint32_t g_function(uint32_t x){
-	uint32_t z;
-	/* sbox substitution */
-	X3 = pgm_read_byte(&(seed_sbox2[X3]));
-	X2 = pgm_read_byte(&(seed_sbox1[X2]));
-	X1 = pgm_read_byte(&(seed_sbox2[X1]));
-	X0 = pgm_read_byte(&(seed_sbox1[X0]));
-	/* now the permutation */
-	Z0 = (X0 & M0) ^ (X1 & M1) ^ (X2 & M2) ^ (X3 & M3);
-	Z1 = (X0 & M1) ^ (X1 & M2) ^ (X2 & M3) ^ (X3 & M0);
-	Z2 = (X0 & M2) ^ (X1 & M3) ^ (X2 & M0) ^ (X3 & M1);
-	Z3 = (X0 & M3) ^ (X1 & M0) ^ (X2 & M1) ^ (X3 & M2);
-	return z;
-}
-/******************************************************************************/
-typedef struct {
-	uint32_t k0, k1;
-} keypair_t;
-
-keypair_t getnextkeys(uint32_t *keystate, uint8_t curround){
-	keypair_t ret;
-	if (curround>15){
-		/* ERROR */
-		ret.k0 = ret.k1 = 0;
-	} else {
-	/*	ret.k0 = g_function(keystate[0] + keystate[2] - pgm_read_dword(&(seed_kc[curround])));
-		ret.k1 = g_function(keystate[1] - keystate[3] + pgm_read_dword(&(seed_kc[curround]))); */
-		ret.k0 = bigendian_sum32(keystate[0], keystate[2]);
-		ret.k0 = bigendian_sub32(ret.k0, pgm_read_dword(&(seed_kc[curround])));
-		ret.k0 = g_function(ret.k0);
-		ret.k1 = bigendian_sub32(keystate[1], keystate[3]);
-		ret.k1 = bigendian_sum32(ret.k1, pgm_read_dword(&(seed_kc[curround])));
-		ret.k1 = g_function(ret.k1);
-		
-		if (curround & 1){
-			/* odd round (1,3,5, ...) */
-			((uint64_t*)keystate)[1] = bigendian_rotl8_64( ((uint64_t*)keystate)[1] );
-		} else {
-			/* even round (0,2,4, ...) */
-			((uint64_t*)keystate)[0] = bigendian_rotr8_64(((uint64_t*)keystate)[0]);
-		}
-	}
-	return ret;
-}
-
-
-/******************************************************************************/
-
-keypair_t getprevkeys(uint32_t *keystate, uint8_t curround){
-	keypair_t ret;
-	if (curround>15){
-		/* ERROR */
-		ret.k0 = ret.k1 = 0;
-	} else {
-		if (curround & 1){
-			/* odd round (1,3,5, ..., 15) */
-			((uint64_t*)keystate)[1] = bigendian_rotr8_64( ((uint64_t*)keystate)[1] );
-		} else {
-			/* even round (0,2,4, ..., 14) */
-			((uint64_t*)keystate)[0] = bigendian_rotl8_64(((uint64_t*)keystate)[0]);
-		}
-	/*	ret.k0 = g_function(keystate[0] + keystate[2] - pgm_read_dword(&(seed_kc[curround])));
-		ret.k1 = g_function(keystate[1] - keystate[3] + pgm_read_dword(&(seed_kc[curround]))); */
-		ret.k0 = bigendian_sum32(keystate[0], keystate[2]);
-		ret.k0 = bigendian_sub32(ret.k0, pgm_read_dword(&(seed_kc[curround])));
-		ret.k0 = g_function(ret.k0);
-		ret.k1 = bigendian_sub32(keystate[1], keystate[3]);
-		ret.k1 = bigendian_sum32(ret.k1, pgm_read_dword(&(seed_kc[curround])));
-		ret.k1 = g_function(ret.k1);
-		}
-	return ret;
-}
-
-/******************************************************************************/
-
-typedef struct{
-	uint32_t k[4];
-} seed_ctx_t;
-
-/******************************************************************************/
-
-void seed_init(const void * key, seed_ctx_t * ctx){
-	memcpy(ctx->k, key, 128/8);
-}
-
-/******************************************************************************/
-
-#define L (((uint64_t*)buffer)[0])
-#define R (((uint64_t*)buffer)[1])
-
-void seed_enc(void * buffer, const seed_ctx_t * ctx){
-	uint8_t r;
-	keypair_t k;
-	for(r=0; r<8; ++r){
-			k = getnextkeys(((seed_ctx_t*)ctx)->k, 2*r);
-/*
-	DEBUG_S("\r\n\tDBG ka,0: "); cli_hexdump(&k.k0, 4);
-	DEBUG_S("\r\n\tDBG ka,1: "); cli_hexdump(&k.k1, 4);
-	DEBUG_S("\r\n\t DBG L: "); cli_hexdump((uint8_t*)buffer+0, 8);
-	DEBUG_S("\r\n\t DBG R: "); cli_hexdump((uint8_t*)buffer+8, 8);
-*/
-			L ^= f_function(&R,k.k0,k.k1);
-			
-			k = getnextkeys(((seed_ctx_t*)ctx)->k, 2*r+1);
-/*
-	DEBUG_S("\r\n\tDBG kb,0: "); cli_hexdump(&k.k0, 4);
-	DEBUG_S("\r\n\tDBG kb,1: "); cli_hexdump(&k.k1, 4);
-	DEBUG_S("\r\n\t DBG L: "); cli_hexdump((uint8_t*)buffer+8, 8);
-	DEBUG_S("\r\n\t DBG R: "); cli_hexdump((uint8_t*)buffer+0, 8);
-*/
-			R ^= f_function(&L,k.k0,k.k1);
-	}
-	/* just an exchange without temp. variable */
-	L ^= R;
-	R ^= L;
-	L ^= R;
-}
-
-/******************************************************************************/
-
-#define L (((uint64_t*)buffer)[0])
-#define R (((uint64_t*)buffer)[1])
-
-void seed_dec(void * buffer, seed_ctx_t * ctx){
-	int8_t r;
-	keypair_t k;
-	for(r=7; r>=0; --r){
-			k = getprevkeys(((seed_ctx_t*)ctx)->k, 2*r+1);
-/*
-	DEBUG_S("\r\n\tDBG ka,0: "); cli_hexdump(&k.k0, 4);
-	DEBUG_S("\r\n\tDBG ka,1: "); cli_hexdump(&k.k1, 4);
-	DEBUG_S("\r\n\t DBG L: "); cli_hexdump((uint8_t*)buffer+0, 8);
-	DEBUG_S("\r\n\t DBG R: "); cli_hexdump((uint8_t*)buffer+8, 8);
-*/
-			L ^= f_function(&R,k.k0,k.k1);
-			
-			k = getprevkeys(((seed_ctx_t*)ctx)->k, 2*r+0);
-/*
-	DEBUG_S("\r\n\tDBG kb,0: "); cli_hexdump(&k.k0, 4);
-	DEBUG_S("\r\n\tDBG kb,1: "); cli_hexdump(&k.k1, 4);
-	DEBUG_S("\r\n\t DBG L: "); cli_hexdump((uint8_t*)buffer+8, 8);
-	DEBUG_S("\r\n\t DBG R: "); cli_hexdump((uint8_t*)buffer+0, 8);
-*/
-			R ^= f_function(&L,k.k0,k.k1);
-	}
-	/* just an exchange without temp. variable */
-	L ^= R;
-	R ^= L;
-	L ^= R;
-}
-
-
-
-
-
-
-
-
-
-
-
diff --git a/seed_sbox.h b/seed_sbox.h
deleted file mode 100644
index 9937913..0000000
--- a/seed_sbox.h
+++ /dev/null
@@ -1,125 +0,0 @@
-/* seed_sbox.h */
-/*
-    This file is part of the AVR-Crypto-Lib.
-    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-/**
- * \file	seed_sbox.h
- * \author	Daniel Otte 
- * \date	2007-06-1
- * \brief 	sboxes and constants for seed
- * \par License	
- * GPL
- * 
- */
- 
-#ifndef SEED_SBOX_H_
-#define SEED_SBOX_H_
-
-#include <stdint.h>
-#include <avr/pgmspace.h>
-
-uint8_t seed_sbox1[256] PROGMEM ={
-	169,  133,  214,  211,   84,   29,  172,   37, 
-	 93,   67,   24,   30,   81,  252,  202,   99, 
-	 40,   68,   32,  157,  224,  226,  200,   23, 
-	165,  143,    3,  123,  187,   19,  210,  238, 
-	112,  140,   63,  168,   50,  221,  246,  116, 
-	236,  149,   11,   87,   92,   91,  189,    1, 
-	 36,   28,  115,  152,   16,  204,  242,  217, 
-	 44,   231, 114,  131,  155,  209,  134,  201, 
-	 96,   80,  163,  235,   13,  182,  158,   79, 
-	183,   90,  198,  120,  166,   18,  175,  213, 
-	 97,  195,  180,   65,   82,  125,  141,    8, 
-	 31,  153,    0,   25,    4,   83,  247,  225, 
-	253,  118,   47,   39,  176,  139,   14,  171, 
-	162,  110,  147,   77,  105,  124,    9,   10, 
-	191,  239,  243,  197,  135,   20,  254,  100, 
-	222,   46,   75,   26,    6,   33,  107,  102, 
-	  2,  245,  146,  138,   12,  179,  126,  208, 
-	122,   71,  150,  229,   38,  128,  173,  223, 
-	161,   48,   55,  174,   54,   21,   34,   56, 
-	244,  167,   69,   76,  129,  233,  132,  151, 
-	 53,  203,  206,   60,  113,   17,  199,  137, 
-	117,  251,  218,  248,  148,   89,  130,  196, 
-	255,   73,   57,  103,  192,  207,  215,  184, 
-	 15,  142,   66,   35,  145,  108,  219,  164, 
-	 52,  241,   72,  194,  111,   61,   45,   64, 
-	190,   62,  188,  193,  170,  186,   78,   85, 
-	 59,  220,  104,  127,  156,  216,   74,   86, 
-	119,  160,  237,   70,  181,   43,  101,  250, 
-	227,  185,  177,  159,   94,  249,  230,  178, 
-	 49,  234,  109,   95,  228,  240,  205,  136, 
-	 22,   58,   88,  212,   98,   41,    7,   51, 
-	232,   27,    5,  121,  144,  106,   42,  154
-};
-
-uint8_t seed_sbox2[256] PROGMEM ={
-	 56,  232,   45,  166,  207,  222,  179,  184, 
-	175,   96,   85,  199,   68,  111,  107,   91, 
-	195,   98,   51,  181,   41,  160,  226,  167, 
-	211,  145,   17,    6,   28,  188,   54,   75, 
-	239,  136,  108,  168,   23,  196,   22,  244, 
-	194,   69,  225,  214,   63,   61,  142,  152, 
-	 40,   78,  246,   62,  165,  249,   13,  223, 
-	216,   43,  102,  122,   39,   47,  241,  114, 
-	 66,  212,   65,  192,  115,  103,  172,  139, 
-	247,  173,  128,   31,  202,   44,  170,   52, 
-	210,   11,  238,  233,   93,  148,   24,  248, 
-	 87,  174,    8,  197,   19,  205,  134,  185, 
-	255,  125,  193,   49,  245,  138,  106,  177, 
-	209,   32,  215,    2,   34,    4,  104,  113, 
-	  7,  219,  157,  153,   97,  190,  230,   89, 
-	221,   81,  144,  220,  154,  163,  171,  208, 
-	129,   15,   71,   26,  227,  236,  141,  191, 
-	150,  123,   92,  162,  161,   99,   35,   77, 
-	200,  158,  156,   58,   12,   46,  186,  110, 
-	159,   90,  242,  146,  243,   73,  120,  204, 
-	 21,  251,  112,  117,  127,   53,   16,    3, 
-	100,  109,  198,  116,  213,  180,  234,    9, 
-	118,   25,  254,   64,   18,  224,  189,    5, 
-	250,    1,  240,   42,   94,  169,   86,   67, 
-	133,   20,  137,  155,  176,  229,   72,  121, 
-	151,  252,   30,  130,   33,  140,   27,   95, 
-	119,   84,  178,   29,   37,   79,    0,   70, 
-	237,   88,   82,  235,  126,  218,  201,  253, 
-	 48,  149,  101,   60,  182,  228,  187,  124, 
-	 14,   80,   57,   38,   50,  132,  105,  147, 
-	 55,  231,   36,  164,  203,   83,   10,  135, 
-	217,   76,  131,  143,  206,   59,   74,  183 
-};
-
-/* key constants */
-uint32_t seed_kc[16] PROGMEM ={
-	0xb979379e, 
-	0x73f36e3c,
-	0xe6e6dd78, 
-	0xcccdbbf1, 
-	0x999b77e3, 
-	0x3337efc6, 
-	0x676ede8d, 
-	0xcfdcbc1b, 
-	0x9eb97937,
-	0x3c73f36e,	
-	0x78e6e6dd,
-	0xf1cccdbb,
-	0xe3999b77,
-	0xc63337ef,
-	0x8d676ede,
-	0x1bcfdcbc
-};
-
-#endif /*SEED_SBOX_H_*/
diff --git a/serpent-asm.S b/serpent-asm.S
deleted file mode 100644
index f5f7cc5..0000000
--- a/serpent-asm.S
+++ /dev/null
@@ -1,754 +0,0 @@
-/* serpent_asm.S */
-/*
-    This file is part of the AVR-Crypto-Lib.
-    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-/* 
- * File:        serpent_sboxes.S
- * Author:      Daniel Otte
- * Date:        2008-08-07
- * License:     GPLv3 or later
- * Description: Implementation of the serpent sbox function.
- * 
- */
- 
-#include <avr/io.h>
-#include "avr-asm-macros.S"
-
-/*
-static void serpent_lt(uint8_t *b){
-	X0 = rotl32(X0, 13);
-	X2 = rotl32(X2,  3);
-	X1 ^= X0 ^ X2;
-	X3 ^= X2 ^ (X0 << 3);
-	X1 = rotl32(X1, 1);
-	X3 = rotl32(X3, 7);
-	X0 ^= X1 ^ X3;
-	X2 ^= X3 ^ (X1 << 7);
-	X0 = rotl32(X0, 5);
-	X2 = rotr32(X2, 10);
-}
-*/
-
-#if 0
-A0 =  4
-A1 =  5
-A2 =  6
-A3 =  7
-B0 =  8
-B1 =  9
-B2 = 10
-B3 = 11
-C0 = 12
-C1 = 13
-C2 = 14
-C3 = 15
-D0 = 16
-D1 = 17
-D2 = 18
-D3 = 19
-T0 = 20
-T1 = 21
-T2 = 22
-T3 = 23
-
-serpent_lt:
-	push_range 4, 17
-	movw r26, r24
-	ld A2, X+
-	ld A3, X+
-	ld A0, X+
-	ld A1, X+
-	ldi r20, 3
-	mov r0, A0
-1:	
-	lsr r0
-	ror A3
-	ror A2
-	ror A1
-	ror A0
-	dec r20
-	brne 1b
-	ld B0, X+
-	ld B1, X+
-	ld B2, X+
-	ld B3, X+
-	
-	ld C2, X+
-	ld C3, X+
-	ld C0, X+
-	ld C1, X+
-	ldi r20, 3
-	mov r0, C0
-1:	
-	lsr r0
-	ror C3
-	ror C2
-	ror C1
-	ror C0
-	dec r20
-	brne 1b
-
-	ld D0, X+
-	ld D1, X+
-	ld D2, X+
-	ld D3, X+
-	/* X1 ^= X0 ^ X2; */
-	eor B0, A0
-	eor B0, C0
-	eor B1, A1
-	eor B1, C1
-	eor B2, A2
-	eor B2, C2
-	eor B3, A3
-	eor B3, C3
-	/* X3 ^= X2 ^ (X0 << 3); */
-	mov T0, A0
-	mov T1, A1
-	mov T2, A2
-	mov T3, A3
-	ldi r24, 3
-1:
-	lsl T0
-	rol T1
-	rol T2
-	rol T3
-	dec r24
-	brne 1b
-	eor C0, B0
-	eor C0, T0
-	eor C1, B1
-	eor C1, T1
-	eor C2, B2
-	eor C2, T2
-	eor C3, B3
-	eor C3, T3
-	/*	X1 = rotl32(X1, 1); */
-	mov r0, B3
-	lsl r0
-	rol B0
-	rol B1
-	rol B2
-	rol B3
-	/* X3 = rotl32(X3, 7); */
-	mov r0, D3
-	mov D3, D2
-	mov D2, D1
-	mov D1, D0
-	mov D0, r0
-	lsr r0
-	ror D3
-	ror D2
-	ror D1
-	ror D0
-	/* 	X0 ^= X1 ^ X3; */
-	eor A0, B0
-	eor A0, D0
-	eor A1, B1
-	eor A1, D1
-	eor A2, B2
-	eor A2, D2
-	eor A3, B3
-	eor A3, D3
-	/*  X2 ^= X3 ^ (X1 << 7); */
-	mov T1, B0
-	mov T2, B1
-	mov T3, B2
-	clr T0
-	mov r0, B3
-	lsr r0
-	ror T2
-	ror T1
-	ror T0 
-	eor C0, D0
-	eor C0, T0
-	eor C1, D1
-	eor C1, T1
-	eor C2, D2
-	eor C2, T2
-	eor C3, D3
-	eor C3, T3
-	/* 	X0 = rotl32(X0, 5); */
-	ldi r24, 5
-	mov r0, A3
-1:	
-	lsl r0
-	rol A0
-	rol A1
-	rol A2
-	rol A3
-	dec r24
-	brne 1b
-	/* X2 = rotr32(X2, 10); */
-	mov r0, C0
-	mov C0, C1
-	mov C1, C2
-	mov C2, C3	
-	mov C3, r0
-	ldi r24, 2
-1:
-	lsr r0
-	ror C2
-	ror C1
-	ror C0
-	ror C3	
-	dec r24
-	brne 1b
-	
-	clr r31
-	ldi r30, D3+1
-	ldi r24, 16
-1:
-	ld r0, -Z
-	st -X, r0	
-	dec r24
-	brne 1b
-	
-	pop_range 4, 17
-	ret
-#endif
-
-T0 = 22
-T1 = 23
-T2 = 24
-T3 = 25
-TT = 21
-/* rotate the data word (4 byte) pointed to by X by r20 bits to the right */
-memrotr32:
-	ld T0, X+
-	ld T1, X+
-	ld T2, X+
-	ld T3, X+
-	mov TT, T0
-1:
-	lsr TT
-	ror T3
-	ror T2
-	ror T1
-	ror T0
-	dec r20
-	brne 1b
-	st -X, T3
-	st -X, T2
-	st -X, T1
-	st -X, T0
-	ret
-	
-/* rotate the data word (4 byte) pointed to by X by r20 bits to the left */
-memrotl32:
-	ld T0, X+
-	ld T1, X+
-	ld T2, X+
-	ld T3, X+
-	mov TT, T3
-1:
-	lsl TT
-	rol T0
-	rol T1
-	rol T2
-	rol T3
-	dec r20
-	brne 1b
-	st -X, T3
-	st -X, T2
-	st -X, T1
-	st -X, T0
-	ret
-
-/* xor the dataword (4 byte) pointed by Z into X */	
-memeor32:
-  ldi T2, 4
-1:  
-  ld T0, X		
-  ld T1, Z+
-  eor T0, T1
-  st X+, T0
-  dec T2
-  brne 1b
-  ret
-
-serpent_lt:
-	 /* X0 := X0 <<< 13 */
-	movw r26, r24
-	ldi r20, 7
-	rcall memrotl32
-	ldi r20, 6
-	rcall memrotl32
-	/* X2 := X2 <<< 3 */
-	adiw r26, 8
-	ldi r20, 3
-	rcall memrotl32
-	/* X1 ^= X2 */
-	movw r30, r26
-	sbiw r26, 4
-	rcall memeor32
-	/* X1 ^= X0 */
-	sbiw r26, 4
-	sbiw r30, 12
-	rcall memeor32
-	/* X3 ^= X2 */
-	movw r30, r26
-	adiw r26, 4
-	rcall memeor32
-	/* T := X0 */
-	sbiw r26, 16
-	ld r18, X+
-	ld r19, X+
-	ld r20, X+
-	ld r21, X+
-	/* T := T<<3 */
-	ldi r22, 3
-1:
-	lsl r18
-	rol r19
-	rol r20
-	rol r21
-	dec r22
-	brne 1b
-	clr r31 
-	/* X3 ^= T */
-	adiw r26, 8
-	ldi r30, 18
-	rcall memeor32
-	/* X1 := X1<<<1 */
-	sbiw r26, 12
-	ldi r20, 1
-	rcall memrotl32
-	/* X3 := X3<<<7 */
-	adiw r26, 8
-	ldi r20, 7
-	rcall memrotl32
-	/* X0 ^= X3 */
-	movw r30, r26
-	sbiw r26, 12
-	rcall memeor32
-	/* X0 ^= X1 */
-	movw r30, r26
-	sbiw r26, 4
-	rcall memeor32
-	/* X2 ^= X3 */
-	adiw r26, 4
-	adiw r30, 4
-	rcall memeor32
-	/* T := X1<<<8 */
-	sbiw r26, 8
-	ld r19, X+
-	ld r20, X+
-	ld r21, X+
-	ld r18, X+
-	/* T := T>>>1; T&=0xfffffff8 */
-	lsr r18
-	ror r21
-	ror r20
-	ror r19
-	clr r18
-	ror r18
-	clr r31
-	ldi r30, 18
-	/* X2 ^= T */
-	rcall memeor32
-	/* X0 := X0 <<< 5 */
-	sbiw r26, 12
-	ldi r20, 5
-	rcall memrotl32
-	/* X3 := X3 >>> 10 */
-	adiw r26, 8
-	ldi r20, 7
-	rcall memrotr32
-	ldi r20, 3
-	rcall memrotr32
-	ret
-
-serpent_inv_lt:
-	 /* X0 := X0 >>> 5 */
-	movw r26, r24
-	ldi r20, 5
-	rcall memrotr32
-	/* X2 := X2 <<< 10 */
-	adiw r26, 8
-	ldi r20, 7
-	rcall memrotl32
-	ldi r20, 3
-	rcall memrotl32
-	/* X2 ^= X3 */
-	movw r30, r26
-	adiw r30, 4
-	rcall memeor32
-	sbiw r26, 4
-	sbiw r30, 12
-	/* T := X1<<7 */
-	ld r19, Z+
-	ld r20, Z+
-	ld r21, Z+
-	ld r18, Z+
-	lsr r18
-	ror r21
-	ror r20
-	ror r19
-	clr r18
-	ror r18
-    clr r31
-    /* X2 ^= T */
-    ldi r30, 18
-    rcall memeor32
-    /* X0 ^= X1 */
-    sbiw r26, 12
-    movw r30, r26
-    adiw r30, 4
-    rcall memeor32
-    /* X0 ^= X3 */
-    sbiw r26, 4
-    adiw r30, 4
-    rcall memeor32
-    /* X1 := X1>>>1 */
-    ldi r20, 1
-	rcall memrotr32
-	/* X3 := X3>>>7 */
-	adiw r26, 8
-	ldi r20, 7
-	rcall memrotr32
-	/* X3 ^= X2 */
-	sbiw r30, 8
-	rcall memeor32
-	sbiw r26, 4
-	/* T:= X0<<3 */
-	sbiw r30, 12
-	ld r18, Z+
-	ld r19, Z+
-	ld r20, Z+
-	ld r21, Z+
-	ldi r24, 3
-1:
-	lsl r18
-	rol r19
-	rol r20
-	rol r21
-	dec r24
-	brne 1b
-	/* X3 ^= T */
-	clr r31
-	ldi r30, 18
-	rcall memeor32
-	/* X1 ^= X0 */
-	sbiw r26, 12
-	movw r30, r26
-	sbiw r30, 4
-	rcall memeor32
-	/* X1 ^= X2 */
-	movw r26, r30
-	adiw r30, 4
-	rcall memeor32
-	/* X2 := X2 >>> 3 */
-	ldi r20, 3
-	rcall memrotr32
-	/* X0 := X0 >>> 13 */
-	sbiw r26, 8
-	ldi r20, 7
-	rcall memrotr32
-	ldi r20, 6
-	rcall memrotr32
-	ret
-
-/*
-#define GOLDEN_RATIO 0x9e3779b9l
-
-static uint32_t serpent_gen_w(uint32_t * b, uint8_t i){
-	uint32_t ret;
-	ret = b[0] ^ b[3] ^ b[5] ^ b[7] ^ GOLDEN_RATIO ^ (uint32_t)i;
-	ret = rotl32(ret, 11);
-	return ret;
-}
-*/
-/*
- * param b is passed in r24:r25
- * param i is passed in r22
- * return value is returned in r22.r23.r24.r25
- */
- /* trashes:
-  *  r20-r25, r30-r31
-  */
-serpent_gen_w:
-	movw r30, r24
-	/* ^i^b[0]*/
-    ld r21, Z+
-    eor r22, r21
-    ld r23, Z+
-    ld r24, Z+
-    ld r25, Z+
-    /* ^b[3]^b[5]^[b7] */
-    adiw r30, 4
-    ldi r20, 3
-1:    
-    adiw r30, 4
-    ld r21, Z+
-    eor r22, r21
-    ld r21, Z+
-    eor r23, r21
-    ld r21, Z+
-    eor r24, r21
-    ld r21, Z+
-    eor r25, r21
-	dec r20
-	brne 1b
-	/* ^0x9e3779b9l */
-	ldi r21, 0xb9
-	eor r22, r21
-	ldi r21, 0x79
-	eor r23, r21
-	ldi r21, 0x37
-	eor r24, r21
-	ldi r21, 0x9e
-	eor r25, r21
-	/* <<<11 */
-	mov r21, r25
-	mov r25, r24
-	mov r24, r23
-	mov r23, r22
-	mov r22, r21
-	mov r21, r25
-	ldi r20, 3
-1:
-	lsl r21
-	rol r22
-	rol r23
-	rol r24
-	rol r25
-	dec r20
-	brne 1b
-	ret
-
-/*
- * void serpent_init(const void* key, uint16_t keysize_b, serpent_ctx_t* ctx)
- */
-/*
- * param key     is passed in r24:r25
- * param keysize is passed in r22:r23
- * param ctx     is passed in r20:r21
- */
-.global serpent_init
-serpent_init:
-    stack_alloc 32
-    adiw r30, 1
-	push_ r30, r31
-    movw r26, r22
-    adiw r26, 7
-    tst r27
-    breq 1f
-	ldi r26, 32
-	rjmp 2f
-1:
-	lsr r26
-	lsr r26
-	lsr r26
-2:	
-	mov r22, r26
-	bst r22, 5 /* store in T if we have to do the "append 1 thing"*/
-	ldi r27, 32
-3:	/* set buffer to zero */
-	st Z+, r1
-	dec r27
-	brne 3b
-	
-	movw r26, r24 /* X points to the key */
-	sbiw r30, 32
-	tst r22
-	breq 5f /* if keylength_b==0 */
-4:	/* copy keybytes to buffer */
-	ld r19, X+
-	st Z+, r19
-	dec r22
-	brne 4b
-5:
-	brts 7f /* if keylength_b == 256 */
-	ldi r18, 0x01
-	andi r22, 0x07
-	brne 6f
-	st Z, r18
-	rjmp 7f
-6:	/* shift the one to the right position */
-	lsl r18
-	dec r22
-	brne 6b
-	or r18, r19
-	st -Z, r18
-7: /* post "appending 1 thing" buffer is ready for subkey generation */
-	movw r26, r20  /* X points to the context */
-	
-	pop_ r19, r18 /* r18:r19 points to the buffer */
-	push r16
-	clr r16
-8:
-	movw r24, r18
-	mov  r22, r16
-	rcall serpent_gen_w
-	movw r30, r18
-	ldi r20, 7*4
-1: /* the memmove */
-	ldd r0, Z+4
-	st Z+, r0
-	dec r20
-	brne 1b
-  /* store new word in buffer and context */	
-	st Z+, r22
-	st Z+, r23
-	st Z+, r24
-	st Z+, r25
-	st X+, r22
-	st X+, r23
-	st X+, r24
-	st X+, r25
-	
-	inc r16
-	cpi r16, 132
-	brne 8b	
-	
-	push_ r28, r29
-	movw r28, r26
-	subi r28, lo8(132*4)
-	sbci r29, hi8(132*4)
-	ldi r16, 33
-2:
-	movw r24, r28
-	adiw r28, 16
-	ldi r22, 2
-	add r22, r16
-	rcall sbox128
-	dec r16
-	brne 2b
-	pop_ r29, r28, r16
-	stack_free 32
-	ret
-
-/*
- * void serpent_enc(void* buffer, const serpent_ctx_t* ctx){
- */
-/*
- * param buffer is passed in r24:r25
- * param ctx    is passed in r22:r23
- */
-.global serpent_enc
-serpent_enc:
-
-	push_ r12, r13, r14, r15, r16 
-	clr r16
-	movw r14, r24
-	movw r12, r22
-1:
-	movw r24, r14
-	movw r22, r12
-	ldi r20, 16
-	add r12, r20
-	adc r13, r1
-	clr r21
-	rcall memxor
-	movw r24, r14
-	mov r22, r16
-	rcall sbox128
-	movw r24, r14
-	rcall serpent_lt
-	
-	inc r16
-	cpi r16, 31
-	brne 1b
-	
-	movw r24, r14
-	movw r22, r12
-	ldi r20, 16
-	add r12, r20
-	adc r13, r1
-	clr r21
-	rcall memxor
-	movw r24, r14
-	mov r22, r16
-	rcall sbox128
-	
-	inc r16
-	movw r24, r14
-	movw r22, r12
-	ldi r20, 16
-	clr r21
-	pop_ r16, r15, r14, r13, r12
-	rjmp memxor
-
-/*
- * void serpent_dec(void* buffer, const serpent_ctx_t* ctx){
- */
-/*
- * param buffer is passed in r24:r25
- * param ctx    is passed in r22:r23
- */
-.global serpent_dec
-serpent_dec:
-	push_ r12, r13, r14, r15, r16 
-	movw r14, r24
-//	ldi r16, lo8(32*16)
-//	add r22, r16
-	ldi r16, hi8(32*16)
-	add r23, r16
-	movw r12, r22
-	ldi r20, 16
-	clr r21
-	rcall memxor
-	
-	movw r24, r14
-	ldi r22, 31
-	call inv_sbox128
-	
-	movw r24, r14
-	ldi r20, 16
-	sub r12, r20
-	sbc r13, r1
-	movw r22, r12
-	clr r21
-	rcall memxor
-	ldi r16, 31
-1:
-	dec r16
-	movw r24, r14
-	rcall serpent_inv_lt
-	movw r24, r14
-	mov r22, r16
-	rcall inv_sbox128
-	movw r24, r14
-	ldi r20, 16
-	sub r12, r20
-	sbc r13, r1
-	movw r22, r12
-	clr r21
-	rcall memxor
-	
-	tst r16
-	brne 1b
-	pop_ r16, r15, r14, r13, r12
-	ret	
-	
-	
-	
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/serpent-sboxes-bitslice-asm.S b/serpent-sboxes-bitslice-asm.S
deleted file mode 100644
index 4e0d7db..0000000
--- a/serpent-sboxes-bitslice-asm.S
+++ /dev/null
@@ -1,854 +0,0 @@
-/* serpent-sboxes-bitslice.c */
-/*
-    This file is part of the AVR-Crypto-Lib.
-    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-/* serpent-sboxes.c
- * a bitsliced implementation of the serpent sboxes
- * author: Daniel Otte 
- * email:  daniel.otte@rub.de
- * license: GPLv3
- */
-
-#include "avr-asm-macros.S"
-IN0  = 22
-IN1  = 23
-IN2  = 24
-IN3  = 25
-OUT0 = 18
-OUT1 = 19
-OUT2 = 20
-OUT3 = 21
-T00 =  2
-T00 =  3
-T01 =  4
-T02 =  5
-T03 =  6
-T04 =  7
-T05 =  8
-T06 =  9
-T07 = 10
-T08 = 11
-T09 = 12
-T10 = 13
-T11 = 14
-T12 = 15
-T13 = 16
-T14 = 17
-T15 = 26
-T16 = 27
-T17 =  0
-
-/* S0:   3  8 15  1 10  6  5 11 14 13  4  2  7  0  9 12 */
-
-/* depth = 5,7,4,2, Total gates=18 */
-sb0:
-    mov T00, IN1
-    eor T00, IN2
-    mov T01, IN0
-    or  T01, IN3
-    mov T02, IN0
-    eor T02, IN1
-    mov OUT3, T01
-    eor OUT3, T00
-    mov T04, IN2
-    or  T04, OUT3
-    mov T05, IN0
-    eor T05, IN3
-    mov T06, IN1
-    or  T06, IN2
-    mov T07, IN3
-    and T07, T04
-    mov T08, T02
-    and T08, T06
-    mov OUT2, T08
-    eor OUT2, T07
-    mov T10, T08
-    and T10, OUT2
-    mov T11, IN2
-    eor T11, IN3
-    mov T12, T06
-    eor T12, T10
-    mov T13, IN1
-    and T13, T05
-    mov T14, T05
-    eor T14, T12
-	mov OUT0, T14
-	com OUT0
-	mov T16, OUT0
-    eor T16, T13
-    mov OUT1, T11
-    eor OUT1, T16
-	ret
-
-	
-/* InvS0:  13  3 11  0 10  6  5 12  1 14  4  7 15  9  8  2 */
-
-/* depth = 8,4,3,6, Total gates=19 */
-sb0_inv:
-    mov T00, IN2
-    eor T00, IN3
-    mov T01, IN0
-    or  T01, IN1
-    mov T02, IN1
-    or  T02, IN2
-    mov T03, IN2
-    and T03, T00
-    mov T04, T01
-    eor T04, T00
-    mov T05, IN0
-    or  T05, T03
-	mov OUT2, T04
-	com OUT2
-    mov T07, IN1
-    eor T07, IN3
-    mov T08, T02
-    and T08, T07
-    mov T09, IN3
-    or  T09, OUT2
-    mov OUT1, T08
-    eor OUT1, T05
-    mov T11, IN0
-    or  T11, T04
-    mov T12, OUT1
-    eor T12, T11
-    mov T13, T02
-    eor T13, T09
-    mov T14, IN0
-    eor T14, IN2
-    mov OUT3, T13
-    eor OUT3, T12
-    mov T16, T04
-    and T16, T12
-    mov T17, T13
-    or  T17, T16
-    mov OUT0, T14
-    eor OUT0, T17
-	ret
-
-
-/* S1:  15 12  2  7  9  0  5 10  1 11 14  8  6 13  3  4 */
-
-/* depth = 10,7,3,5, Total gates=18 */
-sb1:
-    mov T00, IN0
-    or  T00, IN3
-    mov T01, IN2
-    eor T01, IN3
-	mov T02, IN1
-	com T02
-    mov T03, IN0
-    eor T03, IN2
-    mov T04, IN0
-    or  T04, T02
-    mov T05, IN3
-    and T05, T03
-    mov T06, T00
-    and T06, T01
-    mov T07, IN1
-    or  T07, T05
-    mov OUT2, T01
-    eor OUT2, T04
-    mov T09, T06
-    eor T09, T07
-    mov T10, T00
-    eor T10, T09
-    mov T11, OUT2
-    eor T11, T10
-    mov T12, IN1
-    and T12, IN3
-	mov OUT3, T09
-	com OUT3
-    mov OUT1, T12
-    eor OUT1, T11
-    mov T15, T09
-    or  T15, OUT1
-    mov T16, T04
-    and T16, T15
-    mov OUT0, IN2
-    eor OUT0, T16
-	ret
-
-
-/* InvS1:   5  8  2 14 15  6 12  3 11  4  7  9  1 13 10  0 */
-
-/* depth = 7,4,5,3, Total gates=18 */
-sb1_inv:
-    mov T00, IN0
-    eor T00, IN1
-    mov T01, IN1
-    or  T01, IN3
-    mov T02, IN0
-    and T02, IN2
-    mov T03, IN2
-    eor T03, T01
-    mov T04, IN0
-    or  T04, T03
-    mov T05, T00
-    and T05, T04
-    mov T06, IN3
-    or  T06, T02
-    mov T07, IN1
-    eor T07, T05
-    mov T08, T06
-    eor T08, T05
-    mov T09, T03
-    or  T09, T02
-    mov T10, IN3
-    and T10, T07
-    mov OUT2, T08
-    com OUT2
-	mov OUT1, T09
-    eor OUT1, T10
-    mov T13, IN0
-    or  T13, OUT2
-    mov T14, T05
-    eor T14, OUT1
-    mov OUT3, T00
-    eor OUT3, T03
-    mov T16, IN2
-    eor T16, T14
-    mov OUT0, T13
-    eor OUT0, T16
-	ret
-
-/* S2:   8  6  7  9  3 12 10 15 13  1 14  4  0 11  5  2 */
-
-/* depth = 3,8,11,7, Total gates=16 */
-sb2:
-    mov T00, IN0
-    or  T00, IN2
-    mov T01, IN0
-    eor T01, IN1
-    mov T02, IN3
-    eor T02, T00
-    mov OUT0, T01
-    eor OUT0, T02
-    mov T04, IN2
-    eor T04, OUT0
-    mov T05, IN1
-    eor T05, T04
-    mov T06, IN1
-    or  T06, T04
-    mov T07, T00
-    and T07, T05
-    mov T08, T02
-    eor T08, T06
-    mov T09, T01
-    or  T09, T08
-    mov OUT1, T09
-    eor OUT1, T07
-    mov T11, IN0
-    or  T11, IN3
-    mov T12, T08
-    eor T12, OUT1
-    mov T13, IN1
-    eor T13, T12
-	mov OUT3, T08
-	com OUT3
-    mov OUT2, T11
-    eor OUT2, T13
-	ret
-
-/* InvS2:  12  9 15  4 11 14  1  2  0  3  6 13  5  8 10  7 */
-
-/* depth = 3,6,8,3, Total gates=18 */
-sb2_inv:
-    mov T00, IN0
-    eor T00, IN3
-    mov T01, IN2
-    eor T01, IN3
-    mov T02, IN0
-    and T02, IN2
-    mov T03, IN1
-    or  T03, T01
-    mov OUT0, T00
-    eor OUT0, T03
-    mov T05, IN0
-    or  T05, IN2
-    mov T06, IN3
-    or  T06, OUT0
-	mov T07, IN3
-	com T07
-    mov T08, IN1
-    and T08, T05
-    mov T09, T07
-    or  T09, T02
-    mov T10, IN1
-    and T10, T06
-    mov T11, T05
-    and T11, T01
-    mov OUT3, T08
-    eor OUT3, T09
-    mov OUT1, T11
-    eor OUT1, T10
-    mov T14, IN2
-    and T14, OUT3
-    mov T15, OUT0
-    eor T15, OUT1
-    mov T16, T09
-    eor T16, T14
-    mov OUT2, T15
-    eor OUT2, T16
-	ret
-
-/* S3:   0 15 11  8 12  9  6  3 13  1  2  4 10  7  5 14 */
-
-/* depth = 8,3,5,5, Total gates=18 */
-sb3:
-    mov T00, IN0
-    eor T00, IN2
-    mov T01, IN0
-    or  T01, IN3
-    mov T02, IN0
-    and T02, IN3
-    mov T03, T00
-    and T03, T01
-    mov T04, IN1
-    or  T04, T02
-    mov T05, IN0
-    and T05, IN1
-    mov T06, IN3
-    eor T06, T03
-    mov T07, IN2
-    or  T07, T05
-    mov T08, IN1
-    eor T08, T06
-    mov T09, IN3
-    and T09, T04
-    mov T10, T01
-    eor T10, T09
-    mov OUT3, T07
-    eor OUT3, T08
-    mov T12, IN3
-    or  T12, OUT3
-    mov T13, IN0
-    or  T13, T06
-    mov T14, IN1
-    and T14, T12
-    mov OUT2, T07
-    eor OUT2, T10
-    mov OUT0, T13
-    eor OUT0, T14
-    mov OUT1, T04
-    eor OUT1, T03
-	ret
-
-/* InvS3:   0  9 10  7 11 14  6 13  3  5 12  2  4  8 15  1 */
-
-/* depth = 3,6,4,4, Total gates=17 */
-sb3_inv:
-    mov T00, IN2
-    or  T00, IN3
-    mov T01, IN0
-    or  T01, IN3
-    mov T02, IN2
-    eor T02, T01
-    mov T03, IN1
-    eor T03, T01
-    mov T04, IN0
-    eor T04, IN3
-    mov T05, T03
-    and T05, T02
-    mov T06, IN1
-    and T06, T00
-    mov OUT2, T04
-    eor OUT2, T05
-    mov T08, IN0
-    eor T08, T02
-    mov OUT0, T06
-    eor OUT0, T02
-    mov T10, OUT0
-    or  T10, T04
-    mov T11, T08
-    and T11, T10
-    mov T12, IN0
-    and T12, OUT2
-    mov T13, T00
-    eor T13, T04
-    mov OUT1, IN1
-    eor OUT1, T11
-    mov T15, IN1
-    or  T15, T12
-    mov OUT3, T13
-    eor OUT3, T15
-	ret
-
-/* S4:   1 15  8  3 12  0 11  6  2  5  4 10  9 14  7 13 */
-
-/* depth = 6,7,5,3, Total gates=19 */
-sb4:
-    mov T00, IN0
-    or  T00, IN1
-    mov T01, IN1
-    or  T01, IN2
-    mov T02, IN0
-    eor T02, T01
-    mov T03, IN1
-    eor T03, IN3
-    mov T04, IN3
-    or  T04, T02
-    mov T05, IN3
-    and T05, T00
-    mov OUT3, T02
-    eor OUT3, T05
-    mov T07, OUT3
-    and T07, T03
-    mov T08, T03
-    and T08, T04
-    mov T09, IN2
-    eor T09, T05
-    mov T10, IN1
-    and T10, IN2
-    mov T11, T03
-    eor T11, T07
-    mov T12, T10
-    or  T12, T02
-    mov T13, T09
-    eor T13, T08
-    mov T14, IN0
-    and T14, T04
-    mov T15, T10
-    or  T15, T11
-    mov OUT2, T12
-    eor OUT2, T07
-    mov OUT1, T14
-    eor OUT1, T15
-	mov OUT0, T13
-	com OUT0
-	ret
-
-/* InvS4:   5  0  8  3 10  9  7 14  2 12 11  6  4 15 13  1 */
-
-/* depth = 6,4,7,3, Total gates=17 */
-sb4_inv:
-    mov T00, IN1
-    or  T00, IN3
-    mov T01, IN2
-    or  T01, IN3
-    mov T02, IN0
-    and T02, T00
-    mov T03, IN1
-    eor T03, T01
-    mov T04, IN2
-    eor T04, IN3
-	mov T05, T02
-	com T05
-    mov T06, IN0
-    and T06, T03
-    mov OUT1, T04
-    eor OUT1, T06
-    mov T08, OUT1
-    or  T08, T05
-    mov T09, IN0
-    eor T09, T06
-    mov T10, T00
-    eor T10, T08
-    mov T11, IN3
-    eor T11, T03
-    mov T12, IN2
-    or  T12, T09
-    mov OUT3, T02
-    eor OUT3, T11
-    mov T14, IN0
-    eor T14, T03
-    mov OUT2, T10
-    eor OUT2, T12
-    mov OUT0, T14
-    eor OUT0, T08
-	ret
-
-/* S5:  15  5  2 11  4 10  9 12  0  3 14  8 13  6  7  1 */
-
-/* depth = 4,6,8,6, Total gates=17 */
-sb5:
-    mov T00, IN1
-    eor T00, IN3
-    mov T01, IN1
-    or  T01, IN3
-    mov T02, IN0
-    and T02, T00
-    mov T03, IN2
-    eor T03, T01
-    mov T04, T02
-    eor T04, T03
-	mov OUT0, T04
-	com OUT0
-    mov T06, IN0
-    eor T06, T00
-    mov T07, IN3
-    or  T07, OUT0
-    mov T08, IN1
-    or  T08, T04
-    mov T09, IN3
-    eor T09, T07
-    mov T10, IN1
-    or  T10, T06
-    mov T11, T02
-    or  T11, OUT0
-    mov T12, T06
-    or  T12, T09
-    mov T13, T00
-    eor T13, T10
-    mov OUT2, T08
-    eor OUT2, T12
-    mov OUT1, T06
-    eor OUT1, T07
-    mov OUT3, T11
-    eor OUT3, T13
-	ret
-
-/* InvS5:   8 15  2  9  4  1 13 14 11  6  5  3  7 12 10  0 */
-
-/* depth = 4,6,9,7, Total gates=17 */
-sb5_inv:
-    mov T00, IN0
-    and T00, IN3
-    mov T01, IN2
-    eor T01, T00
-    mov T02, IN0
-    eor T02, IN3
-    mov T03, IN1
-    and T03, T01
-    mov T04, IN0
-    and T04, IN2
-    mov OUT0, T02
-    eor OUT0, T03
-    mov T06, IN0
-    and T06, OUT0
-    mov T07, T00
-    eor T07, OUT0
-    mov T08, IN1
-    or  T08, T04
-	mov T09, IN1
-	com T09
-    mov OUT1, T07
-    eor OUT1, T08
-    mov T11, T09
-    or  T11, T06
-    mov T12, OUT0
-    or  T12, OUT1
-    mov OUT3, T01
-    eor OUT3, T11
-    mov T14, T01
-    eor T14, T12
-    mov T15, IN1
-    eor T15, IN3
-    mov OUT2, T15
-    eor OUT2, T14
-	ret
-
-/* S6:   7  2 12  5  8  4  6 11 14  9  1 15 13  3 10  0 */
-
-/* depth = 8,3,6,3, Total gates=19 */
-sb6:
-    mov T00, IN0
-    and T00, IN3
-    mov T01, IN1
-    eor T01, IN2
-    mov T02, IN0
-    eor T02, IN3
-    mov T03, T00
-    eor T03, T01
-    mov T04, IN1
-    or  T04, IN2
-	mov OUT1, T03
-	com OUT1
-    mov T06, T02
-    and T06, T04
-    mov T07, IN1
-    and T07, OUT1
-    mov T08, IN0
-    or  T08, IN2
-    mov T09, T06
-    eor T09, T07
-    mov T10, IN1
-    or  T10, IN3
-    mov T11, IN2
-    eor T11, T10
-    mov T12, T08
-    eor T12, T09
-	mov OUT2, T12
-	com OUT2
-    mov T14, OUT1
-    and T14, T02
-    mov OUT3, T11
-    eor OUT3, T06
-    mov T16, IN0
-    eor T16, IN1
-    mov T17, OUT2
-    eor T17, T14
-    mov OUT0, T16
-    eor OUT0, T17
-	ret
-
-/* InvS6:  15 10  1 13  5  3  6  0  4  9 14  7  2 12  8 11 */
-
-/* depth = 5,3,8,6, Total gates=19 */
-sb6_inv:
-    mov T00, IN0
-    eor T00, IN2
-	mov T01, IN2
-	com T01
-    mov T02, IN1
-    and T02, T00
-    mov T03, IN1
-    or  T03, T01
-    mov T04, IN3
-    or  T04, T02
-    mov T05, IN1
-    eor T05, IN3
-    mov T06, IN0
-    and T06, T03
-    mov T07, IN0
-    or  T07, T01
-    mov T08, T06
-    eor T08, T04
-    mov OUT1, T05
-    eor OUT1, T07
-	mov OUT0, T08
-	com OUT0
-    mov T11, IN1
-    and T11, OUT0
-    mov T12, T00
-    and T12, T04
-    mov T13, T00
-    eor T13, T11
-    mov T14, T06
-    eor T14, T12
-    mov T15, IN3
-    or  T15, T01
-    mov T16, IN0
-    eor T16, OUT1
-    mov OUT3, T16
-    eor OUT3, T14
-    mov OUT2, T15
-    eor OUT2, T13
-	ret
-
-/* S7:   1 13 15  0 14  8  2 11  7  4 12 10  9  3  5  6 */
-
-/* depth = 10,7,10,4, Total gates=19 */
-sb7:
-    mov T00, IN0
-    and T00, IN2
-	mov T01, IN3
-	com T01
-    mov T02, IN0
-    and T02, T01
-    mov T03, IN1
-    or  T03, T00
-    mov T04, IN0
-    and T04, IN1
-    mov T05, IN2
-    eor T05, T03
-    mov OUT3, T02
-    eor OUT3, T05
-    mov T07, IN2
-    or  T07, OUT3
-    mov T08, IN3
-    or  T08, T04
-    mov T09, IN0
-    eor T09, T07
-    mov T10, T03
-    and T10, OUT3
-    mov OUT1, T08
-    eor OUT1, T09
-    mov T12, IN1
-    eor T12, OUT1
-    mov T13, T00
-    eor T13, OUT1
-    mov T14, IN2
-    eor T14, T04
-    mov T15, T10
-    or  T15, T12
-    mov T16, T01
-    or  T16, T13
-    mov OUT0, T14
-    eor OUT0, T16
-    mov OUT2, IN0
-    eor OUT2, T15
-	ret
-
-/* InvS7:   3  0  6 13  9 14 15  8  5 12 11  7 10  1  4  2 */
-
-/* depth = 9,7,3,3, Total gates=18 */
-sb7_inv:
-    mov T00, IN0
-    and T00, IN1
-    mov T01, IN0
-    or  T01, IN1
-    mov T02, IN2
-    or  T02, T00
-    mov T03, IN3
-    and T03, T01
-    mov OUT3, T02
-    eor OUT3, T03
-    mov T05, IN1
-    eor T05, T03
-    mov T06, IN3
-    eor T06, OUT3
-    mov T07, T06
-    com T07
-    mov T08, T05
-    or  T08, T07
-    mov T09, IN1
-    eor T09, IN3
-    mov T10, IN0
-    or  T10, IN3
-    mov OUT1, IN0
-    eor OUT1, T08
-    mov T12, IN2
-    eor T12, T05
-    mov T13, IN2
-    and T13, T10
-    mov T14, IN3
-    or  T14, OUT1
-    mov T15, T00
-    or  T15, T09
-    mov OUT0, T12
-    eor OUT0, T14
-    mov OUT2, T13
-    eor OUT2, T15
-	ret
-
-sf_tab:
-.word sb0, sb1, sb2, sb3
-.word sb4, sb5, sb6, sb7
-
-sinvf_tab:
-.word sb0_inv, sb1_inv, sb2_inv, sb3_inv
-.word sb4_inv, sb5_inv, sb6_inv, sb7_inv
-
-/*
-.byte pm_lo8(sb0), pm_hi8(sb0)
-.byte pm_lo8(sb1), pm_hi8(sb1)
-.byte pm_lo8(sb2), pm_hi8(sb2)
-.byte pm_lo8(sb3), pm_hi8(sb3)
-.byte pm_lo8(sb4), pm_hi8(sb4)
-.byte pm_lo8(sb5), pm_hi8(sb5)
-.byte pm_lo8(sb6), pm_hi8(sb6)
-.byte pm_lo8(sb7), pm_hi8(sb7)
-
-
-sinvf_tab:
-.byte pm_lo8(sb0_inv), pm_hi8(sb0_inv)
-.byte pm_lo8(sb1_inv), pm_hi8(sb1_inv)
-.byte pm_lo8(sb2_inv), pm_hi8(sb2_inv)
-.byte pm_lo8(sb3_inv), pm_hi8(sb3_inv)
-.byte pm_lo8(sb4_inv), pm_hi8(sb4_inv)
-.byte pm_lo8(sb5_inv), pm_hi8(sb5_inv)
-.byte pm_lo8(sb6_inv), pm_hi8(sb6_inv)
-.byte pm_lo8(sb7_inv), pm_hi8(sb7_inv)
-*/
-/*
-void sbox128(void * w, uint8_t box){
-	uint8_t i, buffer[16];
-	box &= 0x7;
-	
-	sb_fpt fp;
-	fp = (sb_fpt)pgm_read_word(&(sf_tab[box]));
-	for(i=0; i<4; ++i){
-		fp(buffer+i, (uint8_t*)w+i);
-	}
-	memcpy(w, buffer, 16);
-}
-*/
-.global sbox128
-sbox128:
-	ldi r30, lo8(sf_tab)
-	ldi r31, hi8(sf_tab)
-1:
-;	clr r1
-	andi r22, 0x07
-	lsl r22
-	add r30, r22
-	adc r31, r1
-	lpm r26, Z+
-	lpm r27, Z
-	lsr r27
-	ror r26
-	push r28
-	push r29
-	movw r30, r26
-	movw r28, r24
-	push_range 2, 17
-	ldd IN0, Y+0
-	ldd IN1, Y+4
-	ldd IN2, Y+8
-	ldd IN3, Y+12
-	icall 
-	std Y+0, OUT0
-	std Y+4, OUT1
-	std Y+8, OUT2
-	std Y+12, OUT3
-	ldd IN0, Y+0+1
-	ldd IN1, Y+4+1
-	ldd IN2, Y+8+1
-	ldd IN3, Y+12+1
-	icall
-	std Y+0+1, OUT0
-	std Y+4+1, OUT1
-	std Y+8+1, OUT2
-	std Y+12+1, OUT3
-	ldd IN0, Y+0+2
-	ldd IN1, Y+4+2
-	ldd IN2, Y+8+2
-	ldd IN3, Y+12+2
-	icall
-	std Y+0+2, OUT0
-	std Y+4+2, OUT1
-	std Y+8+2, OUT2
-	std Y+12+2, OUT3
-	ldd IN0, Y+0+3
-	ldd IN1, Y+4+3
-	ldd IN2, Y+8+3
-	ldd IN3, Y+12+3
-	icall
-	std Y+0+3, OUT0
-	std Y+4+3, OUT1
-	std Y+8+3, OUT2
-	std Y+12+3, OUT3
-	pop_range 2, 17
-	pop r29
-	pop r28
-	ret
-	
-.global	inv_sbox128
-inv_sbox128:
-	ldi r30, lo8(sinvf_tab)
-	ldi r31, hi8(sinvf_tab)
-	rjmp 1b	
-/*	
-void inv_sbox128(void * w, uint8_t box){
-	uint8_t i, buffer[16];
-	box &= 0x7;
-	
-	sb_fpt fp;
-	fp = (sb_fpt)pgm_read_word(&(sinvf_tab[box]));
-	for(i=0; i<4; ++i){
-		fp(buffer+i, (uint8_t*)w+i);
-	}
-	memcpy(w, buffer, 16);
-}
-*/
-
-
-
-
-
-
-
diff --git a/serpent-sboxes-bitslice.c b/serpent-sboxes-bitslice.c
deleted file mode 100644
index 96f9c68..0000000
--- a/serpent-sboxes-bitslice.c
+++ /dev/null
@@ -1,479 +0,0 @@
-/* serpent-sboxes-bitslice.c */
-/*
-    This file is part of the AVR-Crypto-Lib.
-    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-/* serpent-sboxes.c
- * a bitsliced implementation of the serpent sboxes
- * author: Daniel Otte 
- * email:  daniel.otte@rub.de
- * license: GPLv3
- */
-
-#include <stdint.h>
-#include <string.h>
-#include <avr/pgmspace.h>
-#include "serpent-sboxes.h"
-
-
-
-
-/* S0:   3  8 15  1 10  6  5 11 14 13  4  2  7  0  9 12 */
-
-/* depth = 5,7,4,2, Total gates=18 */
-static
-void sb0(uint8_t* out, const uint8_t* in){
-// (a,b,c,d,w,x,y,z) 
-	uint8_t t01, t02, t03, t05, t06, t07, t08, t09, t11, t12, t13, t14, t15, t17;
-	t01          = in[4*1]  ^ in[4*2]; 
-	t02          = in[4*0]  | in[4*3]; 
-	t03          = in[4*0]  ^ in[4*1]; 
-	out[4*3]   = t02        ^ t01; 
-	t05          = in[4*2]  | out[4*3]; 
-	t06          = in[4*0]  ^ in[4*3]; 
-	t07          = in[4*1]  | in[4*2]; 
-	t08          = in[4*3]  & t05; 
-	t09          = t03        & t07; 
-	out[4*2]   = t09        ^ t08; 
-	t11          = t09        & out[4*2]; 
-	t12          = in[4*2]  ^ in[4*3]; 
-	t13          = t07        ^ t11; 
-	t14          = in[4*1]  & t06; 
-	t15          = t06        ^ t13; 
-	out[4*0]   =     ~ t15; 
-	t17          = out[4*0] ^ t14; 
-	out[4*1]   = t12        ^ t17; 
-}
-
-/* InvS0:  13  3 11  0 10  6  5 12  1 14  4  7 15  9  8  2 */
-
-/* depth = 8,4,3,6, Total gates=19 */
-static
-void sb0_inv(uint8_t* out, const uint8_t* in){
-	uint8_t t02, t03, t04, t05, t06, t08, t09, t10, t12, t13, t14, t15, t17, t18, t01;
-	t01          = in[4*2] ^ in[4*3];
-	t02          = in[4*0] | in[4*1];
-	t03          = in[4*1] | in[4*2];
-	t04          = in[4*2] & t01;
-	t05          = t02       ^ t01;
-	t06          = in[4*0] | t04;
-	out[4*2]   =     ~ t05;
-	t08          = in[4*1]   ^ in[4*3];
-	t09          = t03       & t08;
-	t10          = in[4*3]   | out[4*2];
-	out[4*1]   = t09       ^ t06;
-	t12          = in[4*0]   | t05;
-	t13          = out[4*1]   ^ t12;
-	t14          = t03       ^ t10;
-	t15          = in[4*0]   ^ in[4*2];
-	out[4*3]   = t14       ^ t13;
-	t17          = t05       & t13;
-	t18          = t14       | t17;
-	out[4*0]   = t15       ^ t18; 
-}
-
-/* S1:  15 12  2  7  9  0  5 10  1 11 14  8  6 13  3  4 */
-
-/* depth = 10,7,3,5, Total gates=18 */
-static 
-void sb1(uint8_t* out, const uint8_t* in){
-	uint8_t t02, t03, t04, t05, t06, t07, t08, t10, t11, t12, t13, t16, t17, t01;
-	t01          = in[4*0]   | in[4*3];
-	t02          = in[4*2]   ^ in[4*3];
-	t03          =     ~ in[4*1];
-	t04          = in[4*0]   ^ in[4*2];
-	t05          = in[4*0]   | t03;
-	t06          = in[4*3]   & t04;
-	t07          = t01       & t02;
-	t08          = in[4*1]   | t06;
-	out[4*2]   = t02       ^ t05;
-	t10          = t07       ^ t08;
-	t11          = t01       ^ t10;
-	t12          = out[4*2]   ^ t11;
-	t13          = in[4*1]   & in[4*3];
-	out[4*3]   =     ~ t10;
-	out[4*1]   = t13       ^ t12;
-	t16          = t10       | out[4*1];
-	t17          = t05       & t16;
-	out[4*0]   = in[4*2]   ^ t17; 
-}
-
-/* InvS1:   5  8  2 14 15  6 12  3 11  4  7  9  1 13 10  0 */
-
-/* depth = 7,4,5,3, Total gates=18 */
-static void sb1_inv(uint8_t* out, const uint8_t* in){
-	uint8_t t02, t03, t04, t05, t06, t07, t08, t09, t10, t11, t14, t15, t17, t01;
-	t01          = in[4*0]   ^ in[4*1];
-	t02          = in[4*1]   | in[4*3];
-	t03          = in[4*0]   & in[4*2];
-	t04          = in[4*2]   ^ t02;
-	t05          = in[4*0]   | t04;
-	t06          = t01       & t05;
-	t07          = in[4*3]   | t03;
-	t08          = in[4*1]   ^ t06;
-	t09          = t07       ^ t06;
-	t10          = t04       | t03;
-	t11          = in[4*3]   & t08;
-	out[4*2]   =     ~ t09;
-	out[4*1]   = t10       ^ t11;
-	t14          = in[4*0]   | out[4*2];
-	t15          = t06       ^ out[4*1];
-	out[4*3]   = t01       ^ t04;
-	t17          = in[4*2]   ^ t15;
-	out[4*0]   = t14       ^ t17; 
-}
-
-/* S2:   8  6  7  9  3 12 10 15 13  1 14  4  0 11  5  2 */
-
-/* depth = 3,8,11,7, Total gates=16 */
-static void sb2(uint8_t* out, const uint8_t* in){
-	uint8_t t02, t03, t05, t06, t07, t08, t09, t10, t12, t13, t14, t01;
-	t01          = in[4*0]   | in[4*2];
-	t02          = in[4*0]   ^ in[4*1];
-	t03          = in[4*3]   ^ t01;
-	out[4*0]   = t02       ^ t03;
-	t05          = in[4*2]   ^ out[4*0];
-	t06          = in[4*1]   ^ t05;
-	t07          = in[4*1]   | t05;
-	t08          = t01       & t06;
-	t09          = t03       ^ t07;
-	t10          = t02       | t09;
-	out[4*1]   = t10       ^ t08;
-	t12          = in[4*0]   | in[4*3];
-	t13          = t09       ^ out[4*1];
-	t14          = in[4*1]   ^ t13;
-	out[4*3]   =     ~ t09;
-	out[4*2]   = t12       ^ t14; 
-}
-
-/* InvS2:  12  9 15  4 11 14  1  2  0  3  6 13  5  8 10  7 */
-
-/* depth = 3,6,8,3, Total gates=18 */
-static void sb2_inv(uint8_t* out, const uint8_t* in){
-	uint8_t t02, t03, t04, t06, t07, t08, t09, t10, t11, t12, t15, t16, t17, t01;
-	t01          = in[4*0]   ^ in[4*3];
-	t02          = in[4*2]   ^ in[4*3];
-	t03          = in[4*0]   & in[4*2];
-	t04          = in[4*1]   | t02;
-	out[4*0]   = t01       ^ t04;
-	t06          = in[4*0]   | in[4*2];
-	t07          = in[4*3]   | out[4*0];
-	t08          =     ~ in[4*3];
-	t09          = in[4*1]   & t06;
-	t10          = t08       | t03;
-	t11          = in[4*1]   & t07;
-	t12          = t06       & t02;
-	out[4*3]   = t09       ^ t10;
-	out[4*1]   = t12       ^ t11;
-	t15          = in[4*2]   & out[4*3];
-	t16          = out[4*0]   ^ out[4*1];
-	t17          = t10       ^ t15;
-	out[4*2]   = t16       ^ t17; 
-}
-
-/* S3:   0 15 11  8 12  9  6  3 13  1  2  4 10  7  5 14 */
-
-/* depth = 8,3,5,5, Total gates=18 */
-static void sb3(uint8_t* out, const uint8_t* in){
-	uint8_t t02, t03, t04, t05, t06, t07, t08, t09, t10, t11, t13, t14, t15, t01;
-	t01          = in[4*0]   ^ in[4*2];
-	t02          = in[4*0]   | in[4*3];
-	t03          = in[4*0]   & in[4*3];
-	t04          = t01       & t02;
-	t05          = in[4*1]   | t03;
-	t06          = in[4*0]   & in[4*1];
-	t07          = in[4*3]   ^ t04;
-	t08          = in[4*2]   | t06;
-	t09          = in[4*1]   ^ t07;
-	t10          = in[4*3]   & t05;
-	t11          = t02       ^ t10;
-	out[4*3]   = t08       ^ t09;
-	t13          = in[4*3]   | out[4*3];
-	t14          = in[4*0]   | t07;
-	t15          = in[4*1]   & t13;
-	out[4*2]   = t08       ^ t11;
-	out[4*0]   = t14       ^ t15;
-	out[4*1]   = t05       ^ t04; 
-}
-
-/* InvS3:   0  9 10  7 11 14  6 13  3  5 12  2  4  8 15  1 */
-
-/* depth = 3,6,4,4, Total gates=17 */
-static void sb3_inv(uint8_t* out, const uint8_t* in){
-	uint8_t t02, t03, t04, t05, t06, t07, t09, t11, t12, t13, t14, t16, t01;
-	t01          = in[4*2]   | in[4*3];
-	t02          = in[4*0]   | in[4*3];
-	t03          = in[4*2]   ^ t02;
-	t04          = in[4*1]   ^ t02;
-	t05          = in[4*0]   ^ in[4*3];
-	t06          = t04       & t03;
-	t07          = in[4*1]   & t01;
-	out[4*2]   = t05       ^ t06;
-	t09          = in[4*0]   ^ t03;
-	out[4*0]   = t07       ^ t03;
-	t11          = out[4*0]   | t05;
-	t12          = t09       & t11;
-	t13          = in[4*0]   & out[4*2];
-	t14          = t01       ^ t05;
-	out[4*1]   = in[4*1]   ^ t12;
-	t16          = in[4*1]   | t13;
-	out[4*3]   = t14       ^ t16; 
-}
-
-/* S4:   1 15  8  3 12  0 11  6  2  5  4 10  9 14  7 13 */
-
-/* depth = 6,7,5,3, Total gates=19 */
-static void sb4(uint8_t* out, const uint8_t* in){
-	uint8_t t02, t03, t04, t05, t06, t08, t09, t10, t11, t12, t13, t14, t15, t16, t01;
-	t01          = in[4*0]   | in[4*1];
-	t02          = in[4*1]   | in[4*2];
-	t03          = in[4*0]   ^ t02;
-	t04          = in[4*1]   ^ in[4*3];
-	t05          = in[4*3]   | t03;
-	t06          = in[4*3]   & t01;
-	out[4*3]   = t03       ^ t06;
-	t08          = out[4*3]   & t04;
-	t09          = t04       & t05;
-	t10          = in[4*2]   ^ t06;
-	t11          = in[4*1]   & in[4*2];
-	t12          = t04       ^ t08;
-	t13          = t11       | t03;
-	t14          = t10       ^ t09;
-	t15          = in[4*0]   & t05;
-	t16          = t11       | t12;
-	out[4*2]   = t13       ^ t08;
-	out[4*1]   = t15       ^ t16;
-	out[4*0]   =     ~ t14; 
-}
-
-/* InvS4:   5  0  8  3 10  9  7 14  2 12 11  6  4 15 13  1 */
-
-/* depth = 6,4,7,3, Total gates=17 */
-static void sb4_inv(uint8_t* out, const uint8_t* in){
-	uint8_t t02, t03, t04, t05, t06, t07, t09, t10, t11, t12, t13, t15, t01;
-	t01          = in[4*1]   | in[4*3];
-	t02          = in[4*2]   | in[4*3];
-	t03          = in[4*0]   & t01;
-	t04          = in[4*1]   ^ t02;
-	t05          = in[4*2]   ^ in[4*3];
-	t06          =     ~ t03;
-	t07          = in[4*0]   & t04;
-	out[4*1]   = t05       ^ t07;
-	t09          = out[4*1]   | t06;
-	t10          = in[4*0]   ^ t07;
-	t11          = t01       ^ t09;
-	t12          = in[4*3]   ^ t04;
-	t13          = in[4*2]   | t10;
-	out[4*3]   = t03       ^ t12;
-	t15          = in[4*0]   ^ t04;
-	out[4*2]   = t11       ^ t13;
-	out[4*0]   = t15       ^ t09; 
-}
-
-/* S5:  15  5  2 11  4 10  9 12  0  3 14  8 13  6  7  1 */
-
-/* depth = 4,6,8,6, Total gates=17 */
-static void sb5(uint8_t* out, const uint8_t* in){
-	uint8_t t02, t03, t04, t05, t07, t08, t09, t10, t11, t12, t13, t14, t01;
-	t01          = in[4*1]   ^ in[4*3];
-	t02          = in[4*1]   | in[4*3];
-	t03          = in[4*0]   & t01;
-	t04          = in[4*2]   ^ t02;
-	t05          = t03       ^ t04;
-	out[4*0]   =     ~ t05;
-	t07          = in[4*0]   ^ t01;
-	t08          = in[4*3]   | out[4*0];
-	t09          = in[4*1]   | t05;
-	t10          = in[4*3]   ^ t08;
-	t11          = in[4*1]   | t07;
-	t12          = t03       | out[4*0];
-	t13          = t07       | t10;
-	t14          = t01       ^ t11;
-	out[4*2]   = t09       ^ t13;
-	out[4*1]   = t07       ^ t08;
-	out[4*3]   = t12       ^ t14; 
-}
-
-/* InvS5:   8 15  2  9  4  1 13 14 11  6  5  3  7 12 10  0 */
-
-/* depth = 4,6,9,7, Total gates=17 */
-static void sb5_inv(uint8_t* out, const uint8_t* in){
-	uint8_t t02, t03, t04, t05, t07, t08, t09, t10, t12, t13, t15, t16, t01;
-	t01          = in[4*0]   & in[4*3];
-	t02          = in[4*2]   ^ t01;
-	t03          = in[4*0]   ^ in[4*3];
-	t04          = in[4*1]   & t02;
-	t05          = in[4*0]   & in[4*2];
-	out[4*0]   = t03       ^ t04;
-	t07          = in[4*0]   & out[4*0];
-	t08          = t01       ^ out[4*0];
-	t09          = in[4*1]   | t05;
-	t10          =     ~ in[4*1];
-	out[4*1]   = t08       ^ t09;
-	t12          = t10       | t07;
-	t13          = out[4*0]   | out[4*1];
-	out[4*3]   = t02       ^ t12;
-	t15          = t02       ^ t13;
-	t16          = in[4*1]   ^ in[4*3];
-	out[4*2]   = t16       ^ t15; 
-}
-
-/* S6:   7  2 12  5  8  4  6 11 14  9  1 15 13  3 10  0 */
-
-/* depth = 8,3,6,3, Total gates=19 */
-static void sb6(uint8_t* out, const uint8_t* in){
-	uint8_t t02, t03, t04, t05, t07, t08, t09, t10, t11, t12, t13, t15, t17, t18, t01;
-	t01          = in[4*0]   & in[4*3];
-	t02          = in[4*1]   ^ in[4*2];
-	t03          = in[4*0]   ^ in[4*3];
-	t04          = t01       ^ t02;
-	t05          = in[4*1]   | in[4*2];
-	out[4*1]   =     ~ t04;
-	t07          = t03       & t05;
-	t08          = in[4*1]   & out[4*1];
-	t09          = in[4*0]   | in[4*2];
-	t10          = t07       ^ t08;
-	t11          = in[4*1]   | in[4*3];
-	t12          = in[4*2]   ^ t11;
-	t13          = t09       ^ t10;
-	out[4*2]   =     ~ t13;
-	t15          = out[4*1]   & t03;
-	out[4*3]   = t12       ^ t07;
-	t17          = in[4*0]   ^ in[4*1];
-	t18          = out[4*2]   ^ t15;
-	out[4*0]   = t17       ^ t18; 
-}
-
-/* InvS6:  15 10  1 13  5  3  6  0  4  9 14  7  2 12  8 11 */
-
-/* depth = 5,3,8,6, Total gates=19 */
-static void sb6_inv(uint8_t* out, const uint8_t* in){
-	uint8_t t02, t03, t04, t05, t06, t07, t08, t09, t12, t13, t14, t15, t16, t17, t01;
-	t01          = in[4*0]   ^ in[4*2];
-	t02          =     ~ in[4*2];
-	t03          = in[4*1]   & t01;
-	t04          = in[4*1]   | t02;
-	t05          = in[4*3]   | t03;
-	t06          = in[4*1]   ^ in[4*3];
-	t07          = in[4*0]   & t04;
-	t08          = in[4*0]   | t02;
-	t09          = t07       ^ t05;
-	out[4*1]   = t06       ^ t08;
-	out[4*0]   =     ~ t09;
-	t12          = in[4*1]   & out[4*0];
-	t13          = t01       & t05;
-	t14          = t01       ^ t12;
-	t15          = t07       ^ t13;
-	t16          = in[4*3]   | t02;
-	t17          = in[4*0]   ^ out[4*1];
-	out[4*3]   = t17       ^ t15;
-	out[4*2]   = t16       ^ t14; 
-}
-
-/* S7:   1 13 15  0 14  8  2 11  7  4 12 10  9  3  5  6 */
-
-/* depth = 10,7,10,4, Total gates=19 */
-static void sb7(uint8_t* out, const uint8_t* in){
-	uint8_t t02, t03, t04, t05, t06, t08, t09, t10, t11, t13, t14, t15, t16, t17, t01;
-	t01          = in[4*0]   & in[4*2];
-	t02          =     ~ in[4*3];
-	t03          = in[4*0]   & t02;
-	t04          = in[4*1]   | t01;
-	t05          = in[4*0]   & in[4*1];
-	t06          = in[4*2]   ^ t04;
-	out[4*3]   = t03       ^ t06;
-	t08          = in[4*2]   | out[4*3];
-	t09          = in[4*3]   | t05;
-	t10          = in[4*0]   ^ t08;
-	t11          = t04       & out[4*3];
-	out[4*1]   = t09       ^ t10;
-	t13          = in[4*1]   ^ out[4*1];
-	t14          = t01       ^ out[4*1];
-	t15          = in[4*2]   ^ t05;
-	t16          = t11       | t13;
-	t17          = t02       | t14;
-	out[4*0]   = t15       ^ t17;
-	out[4*2]   = in[4*0]   ^ t16; 
-}
-
-/* InvS7:   3  0  6 13  9 14 15  8  5 12 11  7 10  1  4  2 */
-
-/* depth = 9,7,3,3, Total gates=18 */
-static void sb7_inv(uint8_t* out, const uint8_t* in){
-	uint8_t t02, t03, t04, t06, t07, t08, t09, t10, t11, t13, t14, t15, t16, t01;
-	t01          = in[4*0]   & in[4*1];
-	t02          = in[4*0]   | in[4*1];
-	t03          = in[4*2]   | t01;
-	t04          = in[4*3]   & t02;
-	out[4*3]   = t03       ^ t04;
-	t06          = in[4*1]   ^ t04;
-	t07          = in[4*3]   ^ out[4*3];
-	t08          =     ~ t07;
-	t09          = t06       | t08;
-	t10          = in[4*1]   ^ in[4*3];
-	t11          = in[4*0]   | in[4*3];
-	out[4*1]   = in[4*0]   ^ t09;
-	t13          = in[4*2]   ^ t06;
-	t14          = in[4*2]   & t11;
-	t15          = in[4*3]   | out[4*1];
-	t16          = t01       | t10;
-	out[4*0]   = t13       ^ t15;
-	out[4*2]   = t14       ^ t16; 
-}
-
-typedef void(*sb_fpt)(uint8_t*, const uint8_t*);
-
-sb_fpt sf_tab[] PROGMEM = {
-	sb0, sb1, sb2, sb3, 
-	sb4, sb5, sb6, sb7
-};
-
-sb_fpt sinvf_tab[] PROGMEM = {
-	sb0_inv, sb1_inv, sb2_inv, sb3_inv, 
-	sb4_inv, sb5_inv, sb6_inv, sb7_inv
-};
-
-void sbox128(void * w, uint8_t box){
-	uint8_t i, buffer[16];
-	box &= 0x7;
-	
-	sb_fpt fp;
-	fp = (sb_fpt)pgm_read_word(&(sf_tab[box]));
-	for(i=0; i<4; ++i){
-		fp(buffer+i, (uint8_t*)w+i);
-	}
-	memcpy(w, buffer, 16);
-}
-
-void inv_sbox128(void * w, uint8_t box){
-	uint8_t i, buffer[16];
-	box &= 0x7;
-	
-	sb_fpt fp;
-	fp = (sb_fpt)pgm_read_word(&(sinvf_tab[box]));
-	for(i=0; i<4; ++i){
-		fp(buffer+i, (uint8_t*)w+i);
-	}
-	memcpy(w, buffer, 16);
-}
-
-
-
-
-
-
-
-
diff --git a/serpent-sboxes-fast.S b/serpent-sboxes-fast.S
deleted file mode 100644
index 9242272..0000000
--- a/serpent-sboxes-fast.S
+++ /dev/null
@@ -1,233 +0,0 @@
-/* serpent-sboxes-fast.S */
-/*
-    This file is part of the AVR-Crypto-Lib.
-    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-/* 
- * File:        serpent-sboxes-fast.S
- * Author:      Daniel Otte
- * Date:        2008-08-07
- * License:     GPLv3 or later
- * Description: Implementation of the serpent sbox function.
- * 
- */
- 
-#include <avr/io.h>
-#include "avr-asm-macros.S"
-
-
-serpent_sbox_fast:
- .byte 0x33, 0x88, 0xFF, 0x11, 0xAA, 0x66, 0x55, 0xBB 
- .byte 0xEE, 0xDD, 0x44, 0x22, 0x77, 0x00, 0x99, 0xCC
- .byte 0xFF, 0xCC, 0x22, 0x77, 0x99, 0x00, 0x55, 0xAA
- .byte 0x11, 0xBB, 0xEE, 0x88, 0x66, 0xDD, 0x33, 0x44 
- .byte 0x88, 0x66, 0x77, 0x99, 0x33, 0xCC, 0xAA, 0xFF 
- .byte 0xDD, 0x11, 0xEE, 0x44, 0x00, 0xBB, 0x55, 0x22
- .byte 0x00, 0xFF, 0xBB, 0x88, 0xCC, 0x99, 0x66, 0x33 
- .byte 0xDD, 0x11, 0x22, 0x44, 0xAA, 0x77, 0x55, 0xEE
- .byte 0x11, 0xFF, 0x88, 0x33, 0xCC, 0x00, 0xBB, 0x66 
- .byte 0x22, 0x55, 0x44, 0xAA, 0x99, 0xEE, 0x77, 0xDD
- .byte 0xFF, 0x55, 0x22, 0xBB, 0x44, 0xAA, 0x99, 0xCC 
- .byte 0x00, 0x33, 0xEE, 0x88, 0xDD, 0x66, 0x77, 0x11
- .byte 0x77, 0x22, 0xCC, 0x55, 0x88, 0x44, 0x66, 0xBB 
- .byte 0xEE, 0x99, 0x11, 0xFF, 0xDD, 0x33, 0xAA, 0x00
- .byte 0x11, 0xDD, 0xFF, 0x00, 0xEE, 0x88, 0x22, 0xBB 
- .byte 0x77, 0x44, 0xCC, 0xAA, 0x99, 0x33, 0x55, 0x66
-
-serpent_sbox_inv_fast:
- .byte 0xDD, 0x33, 0xBB, 0x00, 0xAA, 0x66, 0x55, 0xCC 
- .byte 0x11, 0xEE, 0x44, 0x77, 0xFF, 0x99, 0x88, 0x22
- .byte 0x55, 0x88, 0x22, 0xEE, 0xFF, 0x66, 0xCC, 0x33 
- .byte 0xBB, 0x44, 0x77, 0x99, 0x11, 0xDD, 0xAA, 0x00
- .byte 0xCC, 0x99, 0xFF, 0x44, 0xBB, 0xEE, 0x11, 0x22 
- .byte 0x00, 0x33, 0x66, 0xDD, 0x55, 0x88, 0xAA, 0x77
- .byte 0x00, 0x99, 0xAA, 0x77, 0xBB, 0xEE, 0x66, 0xDD 
- .byte 0x33, 0x55, 0xCC, 0x22, 0x44, 0x88, 0xFF, 0x11
- .byte 0x55, 0x00, 0x88, 0x33, 0xAA, 0x99, 0x77, 0xEE 
- .byte 0x22, 0xCC, 0xBB, 0x66, 0x44, 0xFF, 0xDD, 0x11
- .byte 0x88, 0xFF, 0x22, 0x99, 0x44, 0x11, 0xDD, 0xEE 
- .byte 0xBB, 0x66, 0x55, 0x33, 0x77, 0xCC, 0xAA, 0x00
- .byte 0xFF, 0xAA, 0x11, 0xDD, 0x55, 0x33, 0x66, 0x00 
- .byte 0x44, 0x99, 0xEE, 0x77, 0x22, 0xCC, 0x88, 0xBB
- .byte 0x33, 0x00, 0x66, 0xDD, 0x99, 0xEE, 0xFF, 0x88 
- .byte 0x55, 0xCC, 0xBB, 0x77, 0xAA, 0x11, 0x44, 0x22
-                  
- 
-/*
- * void ip(uint32_t *i, uint8_t *o){
- */
-/* 
- * param i is given in r24:r25
- * parma o is given in r22:r23
- */
-.global serpent_ip
-serpent_ip:
-	push_range 2, 17	
-	movw r26, r24
-	ldi r24, 16
-	clr r31
-	ldi r30, 2
-1:
-	ld r25, X+
-	st Z+, r25
-	dec r24
-	brne 1b
-	/* now the whole input is loaded in r2-r18 */
-	movw r26, r22
-	ldi r21, 4
-4:
-	ldi r20, 8	
-2:	
-	lsr r2
-	ror r19
-	lsr r6
-	ror 19
-	lsr r10
-	ror r19
-	lsr r14
-	ror 19
-	sbrc r20, 0
-	st X+, r19
-	dec r20
-	brne 2b
-	
-	ldi r20, 15
-	ldi r30, 2
-3:
-	ldd r19, Z+1
-	st Z+, r19
-	dec r20
-	brne 3b
-	
-	dec r21
-	brne 4b
-	pop_range 2, 17 
- 	ret
-
-/*
- * void serpent_fp(uint32_t *i, uint8_t *o){
- */
-/* 
- * param i is given in r24:r25
- * parma o is given in r22:r23
- */
-.global serpent_fp
-serpent_fp:
-	movw r26, r24
-	movw r30, r22
-	ldi r18, 4
-1:	
-	ldi r19, 8 
-2:
-	sbrs r19, 0
-	ld r24, X+
-3:
-	lsr r24
-	ror r20
-	lsr r24
-	ror r21
-	lsr r24
-	ror r22
-	lsr r24
-	ror r23
-	dec r19
-	brne 2b
-	
-	st Z+, r20
-	std Z+3, r21
-	std Z+7, r22
-	std Z+11, r23
-	
-	dec r18
-	brne 1b 	
-	ret
-	
-
-/*
- * void inv_sbox128(void * w, uint8_t box)
- */
-.global inv_sbox128 
-inv_sbox128:
- 	andi r22, 0x07
- 	ori  r22, 0x08
- 	rjmp sbox128x_fast
- 
-/*	
- * void sbox128(void * w, uint8_t box);
- */
-/*
- * param w   is passed in r24:r25
- * param box is passed in r22
- */
-.global sbox128 
-sbox128:
- 	andi r22, 0x07
- 
-sbox128x_fast:
-	stack_alloc 16
-	adiw r30, 1
-	push_ r24, r25, r22, r30, r31
-	movw r22, r30	/* Z points to the stack buffer */
-	rcall serpent_ip
-	pop_ r27, r26, r22
-	ldi r25, hi8(serpent_sbox_fast)
-	ldi r24, lo8(serpent_sbox_fast)
-	swap r22 /* r22 *= 16 */
-	add r24, r22
-	adc r25, r1
-	/* now we have X pointing to the buffer and (r24:r25) pointing to the SBox */
-	ldi r22, 16
-1:	
-	movw r30, r24
-	ld r18, X
-	mov r20, r18
-	andi r18, 0x0f
-	add r30, r18
-	adc r31, r1
-	lpm r19, Z
-2:	
-	swap r20
-	andi r20, 0x0f
-	movw r30, r24
-	add r30, r20
-	adc r31, r1
-	lpm r21, Z
-3:	
-	andi r19, 0x0F
-	andi r21, 0xF0
-	or r19, r21
-	st X+, r19
-	dec r22
-	brne 1b
-	
-	pop_ r23, r22
-	movw r24, r26
-	sbiw r24, 16
-
-	rcall serpent_fp
-	
-	stack_free 16	
-	ret
-
-
-
-
-
-
- 
-
-
diff --git a/serpent-sboxes-small.S b/serpent-sboxes-small.S
deleted file mode 100644
index 028fd77..0000000
--- a/serpent-sboxes-small.S
+++ /dev/null
@@ -1,231 +0,0 @@
-/* serpent_sboxes.S */
-/*
-    This file is part of the AVR-Crypto-Lib.
-    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-/* 
- * File:        serpent_sboxes.S
- * Author:      Daniel Otte
- * Date:        2008-08-07
- * License:     GPLv3 or later
- * Description: Implementation of the serpent sbox function.
- * 
- */
- 
-#include <avr/io.h>
-#include "avr-asm-macros.S"
-
-
-serpent_sbox:
- .byte 0x83, 0x1F, 0x6A, 0xB5, 0xDE, 0x24, 0x07, 0xC9
- .byte 0xCF, 0x72, 0x09, 0xA5, 0xB1, 0x8E, 0xD6, 0x43 
- .byte 0x68, 0x97, 0xC3, 0xFA, 0x1D, 0x4E, 0xB0, 0x25
- .byte 0xF0, 0x8B, 0x9C, 0x36, 0x1D, 0x42, 0x7A, 0xE5
- .byte 0xF1, 0x38, 0x0C, 0x6B, 0x52, 0xA4, 0xE9, 0xD7
- .byte 0x5F, 0xB2, 0xA4, 0xC9, 0x30, 0x8E, 0x6D, 0x17
- .byte 0x27, 0x5C, 0x48, 0xB6, 0x9E, 0xF1, 0x3D, 0x0A
- .byte 0xD1, 0x0F, 0x8E, 0xB2, 0x47, 0xAC, 0x39, 0x65
-
-serpent_sbox_inv:
- .byte 0x3D, 0x0B, 0x6A, 0xC5, 0xE1, 0x74, 0x9F, 0x28
- .byte 0x85, 0xE2, 0x6F, 0x3C, 0x4B, 0x97, 0xD1, 0x0A
- .byte 0x9C, 0x4F, 0xEB, 0x21, 0x30, 0xD6, 0x85, 0x7A
- .byte 0x90, 0x7A, 0xEB, 0xD6, 0x53, 0x2C, 0x84, 0x1F
- .byte 0x05, 0x38, 0x9A, 0xE7, 0xC2, 0x6B, 0xF4, 0x1D
- .byte 0xF8, 0x92, 0x14, 0xED, 0x6B, 0x35, 0xC7, 0x0A
- .byte 0xAF, 0xD1, 0x35, 0x06, 0x94, 0x7E, 0xC2, 0xB8
- .byte 0x03, 0xD6, 0xE9, 0x8F, 0xC5, 0x7B, 0x1A, 0x24                  
- 
-/*
- * void ip(uint32_t *i, uint8_t *o){
- */
-/* 
- * param i is given in r24:r25
- * parma o is given in r22:r23
- */
-.global serpent_ip
-serpent_ip:
-	push_range 2, 17	
-	movw r26, r24
-	ldi r24, 16
-	clr r31
-	ldi r30, 2
-1:
-	ld r25, X+
-	st Z+, r25
-	dec r24
-	brne 1b
-	/* now the whole input is loaded in r2-r18 */
-	movw r26, r22
-	ldi r21, 4
-4:
-	ldi r20, 8	
-2:	
-	lsr r2
-	ror r19
-	lsr r6
-	ror 19
-	lsr r10
-	ror r19
-	lsr r14
-	ror 19
-	sbrc r20, 0
-	st X+, r19
-	dec r20
-	brne 2b
-	
-	ldi r20, 15
-	ldi r30, 2
-3:
-	ldd r19, Z+1
-	st Z+, r19
-	dec r20
-	brne 3b
-	
-	dec r21
-	brne 4b
-	pop_range 2, 17 
- 	ret
-
-/*
- * void serpent_fp(uint32_t *i, uint8_t *o){
- */
-/* 
- * param i is given in r24:r25
- * parma o is given in r22:r23
- */
-.global serpent_fp
-serpent_fp:
-	movw r26, r24
-	movw r30, r22
-	ldi r18, 4
-1:	
-	ldi r19, 8 
-2:
-	sbrs r19, 0
-	ld r24, X+
-3:
-	lsr r24
-	ror r20
-	lsr r24
-	ror r21
-	lsr r24
-	ror r22
-	lsr r24
-	ror r23
-	dec r19
-	brne 2b
-	
-	st Z+, r20
-	std Z+3, r21
-	std Z+7, r22
-	std Z+11, r23
-	
-	dec r18
-	brne 1b 
-	
-	ret
-/*
- * void inv_sbox128(void * w, uint8_t box)
- */
-.global inv_sbox128 
-inv_sbox128:
- 	andi r22, 0x07
- 	ori  r22, 0x08
- 	rjmp sbox128x
- 
-/*	
- * void sbox128(void * w, uint8_t box);
- */
-/*
- * param w   is passed in r24:r25
- * param box is passed in r22
- */
-.global sbox128 
-sbox128:
- 	andi r22, 0x07
-
-/*	
- * void sbox128x(void * w, uint8_t box);
- */
-/*
- * param w   is passed in r24:r25
- * param box is passed in r22
- */
- 
-.global sbox128x
-sbox128x:
-	stack_alloc 16
-	adiw r30, 1
-	push_ r24, r25, r22, r30, r31
-	movw r22, r30	/* Z points to the stack buffer */
-	rcall serpent_ip
-	pop_ r27, r26, r22
-	ldi r25, hi8(serpent_sbox)
-	ldi r24, lo8(serpent_sbox)
-	swap r22
-	lsr  r22 /* r22 *= 8 */
-	add r24, r22
-	adc r25, r1
-	/* now we have X pointing to the buffer and (r24:r25) pointing to the SBox */
-	ldi r22, 16
-1:	
-	movw r30, r24
-	ld r18, X
-	mov r20, r18
-	andi r18, 0x0f
-	bst r18, 0
-	lsr r18
-	add r30, r18
-	adc r31, r1
-	lpm r19, Z
-	brtc 2f
-	swap r19
-2:	
-	swap r20
-	andi r20, 0x0f
-	bst r20, 0
-	lsr r20
-	movw r30, r24
-	add r30, r20
-	adc r31, r1
-	lpm r21, Z
-	brts 3f
-	swap r21
-3:	
-	andi r19, 0x0F
-	andi r21, 0xF0
-	or r19, r21
-	st X+, r19
-	dec r22
-	brne 1b
-	
-	pop_ r23, r22
-	movw r24, r26
-	sbiw r24, 16
-
-	rcall serpent_fp
-	
-	stack_free 16	
-	ret
- 
-
-
-
- 
-
-
diff --git a/serpent-sboxes.h b/serpent-sboxes.h
deleted file mode 100644
index c391edc..0000000
--- a/serpent-sboxes.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/* serpent-sboxes.h */
-/*
-    This file is part of the AVR-Crypto-Lib.
-    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-#ifndef SERPENTSBOXES_H_
-#define SERPENTSBOXES_H_
-
-#include <stdint.h>
-
-void sbox128(void * w, uint8_t box);
-void inv_sbox128(void * w, uint8_t box);
-
-
-#endif /*SERPENTSBOXES_H_*/
diff --git a/serpent-sboxes_c.c b/serpent-sboxes_c.c
deleted file mode 100644
index 3a59969..0000000
--- a/serpent-sboxes_c.c
+++ /dev/null
@@ -1,128 +0,0 @@
-/* serpent-sboxes.c */
-/*
-    This file is part of the AVR-Crypto-Lib.
-    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-/* serpent-sboxes.c
- * a non-bitsliced implementation of the serpent sboxes
- * author: Daniel Otte 
- * email:  daniel.otte@rub.de
- * license: GPLv3
- */
-
-#include <stdint.h>
-#include <string.h> /* memset() */
-#include <avr/pgmspace.h>
-#include "serpent-sboxes.h"
-
-uint8_t sbox[] PROGMEM = {
- 0x38, 0xF1, 0xA6, 0x5B, 0xED, 0x42, 0x70, 0x9C,
- 0xFC, 0x27, 0x90, 0x5A, 0x1B, 0xE8, 0x6D, 0x34, 
- 0x86, 0x79, 0x3C, 0xAF, 0xD1, 0xE4, 0x0B, 0x52,
- 0x0F, 0xB8, 0xC9, 0x63, 0xD1, 0x24, 0xA7, 0x5E,
- 0x1F, 0x83, 0xC0, 0xB6, 0x25, 0x4A, 0x9E, 0x7D,
- 0xF5, 0x2B, 0x4A, 0x9C, 0x03, 0xE8, 0xD6, 0x71,
- 0x72, 0xC5, 0x84, 0x6B, 0xE9, 0x1F, 0xD3, 0xA0,
- 0x1D, 0xF0, 0xE8, 0x2B, 0x74, 0xCA, 0x93, 0x56,
-/* now the inverted sboxes */
- 0xD3, 0xB0, 0xA6, 0x5C, 0x1E, 0x47, 0xF9, 0x82,
- 0x58, 0x2E, 0xF6, 0xC3, 0xB4, 0x79, 0x1D, 0xA0,
- 0xC9, 0xF4, 0xBE, 0x12, 0x03, 0x6D, 0x58, 0xA7,
- 0x09, 0xA7, 0xBE, 0x6D, 0x35, 0xC2, 0x48, 0xF1,
- 0x50, 0x83, 0xA9, 0x7E, 0x2C, 0xB6, 0x4F, 0xD1,
- 0x8F, 0x29, 0x41, 0xDE, 0xB6, 0x53, 0x7C, 0xA0,
- 0xFA, 0x1D, 0x53, 0x60, 0x49, 0xE7, 0x2C, 0x8B,
- 0x30, 0x6D, 0x9E, 0xF8, 0x5C, 0xB7, 0xA1, 0x42
-};        
-         
-
-#define SHR_O(a) c=(a)&1; ((a) = (a)>>1)
-#define SHR_I(a) ((a) = (c?0x80:0x00)| ((a)>>1))
-
-static void serpent_ip(uint32_t *i, uint8_t *o){
-	uint8_t c; // carry 
-	uint8_t n,m;
-	memset(o, 0, 16);
-	for(n=0; n<16; ++n){
-		for(m=0; m<2; ++m){
-		SHR_O(i[0]);
-		SHR_I(o[n]);
-		SHR_O(i[1]);
-		SHR_I(o[n]);
-		SHR_O(i[2]);
-		SHR_I(o[n]);
-		SHR_O(i[3]);
-		SHR_I(o[n]);
-		}
-	}
-}
-
-#undef SHR_I
-#define SHR_I(a) ((a) = (c?0x80000000L:0x00L)| ((a)>>1)) /* we use 32-bit words here */
-
-static void serpent_fp(uint32_t *i, uint32_t *o){
-	uint8_t c; // carry 
-	uint8_t n,m;
-	memset(o, 0, 16);
-	for(n=0; n<4; ++n){
-		for(m=0; m<8; ++m){
-		SHR_O(i[n]);
-		SHR_I(o[0]);
-		SHR_O(i[n]);
-		SHR_I(o[1]);
-		SHR_O(i[n]);
-		SHR_I(o[2]);
-		SHR_O(i[n]);
-		SHR_I(o[3]);
-		}
-	}
-}
-
-/******************************************************************************/
-static void sbox128x(uint8_t box, void* w){
-	uint8_t sb[16];
-	uint8_t i,t,x;
-	box &= 0x0f;
-	/* load sbox */
-	for(i=0; i<8; ++i){
-		t = pgm_read_byte(sbox + box*8 + i);
-		sb[2*i+0]=t>>4;
-		sb[2*i+1]=t&0xf;
-	}
-	uint8_t o[16];
-	serpent_ip(w,o);
-	
-	for(i=0; i<16; ++i){
-		t = ((uint8_t*)o)[i];
-		x = sb[t>>4];
-		x <<= 4;
-		x |= sb[t&0xf];
-		((uint8_t*)o)[i] = x;
-	}
-	serpent_fp((uint32_t*)o, w);
-}
-
-void sbox128(void * w, uint8_t box){
-	sbox128x(box&0x7, w);
-}
-
-
-void inv_sbox128(void * w, uint8_t box){
-	sbox128x(((box&0x7)|0x8), w);
-}
-
-
-
diff --git a/serpent.c b/serpent.c
deleted file mode 100644
index 40ccecd..0000000
--- a/serpent.c
+++ /dev/null
@@ -1,140 +0,0 @@
-/* serpent.c */
-/*
-    This file is part of the AVR-Crypto-Lib.
-    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-/* serpent.c
- * a bitsliced implementation of the serpent cipher for avr microcontrollers
- * author: Daniel Otte 
- * email:  daniel.otte@rub.de
- * license: GPLv3
- */
-
-#include <stdint.h>
-#include <string.h> /* memset() */
-#include <avr/pgmspace.h>
-#include "memxor.h"
-#include "serpent.h"
-#include "serpent-sboxes.h"
-
-/******************************************************************************/
-
-uint32_t rotl32(uint32_t a, uint8_t n){
-	return ((a<<n) | (a>>(32-n)));
-}
-
-
-uint32_t rotr32(uint32_t a, uint8_t n){
-	return ((a>>n) | (a<<(32-n)));
-}
-
-
-#define X0 (((uint32_t*)b)[0])
-#define X1 (((uint32_t*)b)[1])
-#define X2 (((uint32_t*)b)[2])
-#define X3 (((uint32_t*)b)[3])
-
-static void serpent_lt(uint8_t *b){
-	X0 = rotl32(X0, 13);
-	X2 = rotl32(X2,  3);
-	X1 ^= X0 ^ X2;
-	X3 ^= X2 ^ (X0 << 3);
-	X1 = rotl32(X1, 1);
-	X3 = rotl32(X3, 7);
-	X0 ^= X1 ^ X3;
-	X2 ^= X3 ^ (X1 << 7);
-	X0 = rotl32(X0, 5);
-	X2 = rotr32(X2, 10);
-}
-
-static void serpent_inv_lt(uint8_t *b){
-	X2 = rotl32(X2, 10);
-	X0 = rotr32(X0, 5);
-	X2 ^= X3 ^ (X1 << 7);
-	X0 ^= X1 ^ X3;
-	X3 = rotr32(X3, 7);
-	X1 = rotr32(X1, 1);
-	X3 ^= X2 ^ (X0 << 3);
-	X1 ^= X0 ^ X2;
-	X2 = rotr32(X2,  3);
-	X0 = rotr32(X0, 13);
-}
-
-#define GOLDEN_RATIO 0x9e3779b9l
-
-static uint32_t serpent_gen_w(uint32_t * b, uint8_t i){
-	uint32_t ret;
-	ret = b[0] ^ b[3] ^ b[5] ^ b[7] ^ GOLDEN_RATIO ^ (uint32_t)i;
-	ret = rotl32(ret, 11);
-	return ret;
-} 
-
-void serpent_init(const void* key, uint16_t keysize_b, serpent_ctx_t* ctx){
-	uint32_t buffer[8];
-	uint8_t i,j;
-	if(keysize_b<256){
-		/* keysize is less than 256 bit, padding needed */
-		memset(buffer, 0, 32);
-		memcpy(buffer, key, (keysize_b+7)/8);
-		((uint8_t*)buffer)[keysize_b/8] |= 1<<(keysize_b%8);
-	} else {
-		/* keysize is 256 bit */
-		memcpy(buffer, key, 32); 
-	}
-	for(i=0; i<33; ++i){
-		for(j=0; j<4; ++j){
-			ctx->k[i][j] = serpent_gen_w(buffer, i*4+j);
-			memmove(buffer, &(buffer[1]), 7*4); /* shift buffer one to the "left" */
-			buffer[7] = ctx->k[i][j];
-		}
-	}
-	for(i=0; i<33; ++i){
-		sbox128(ctx->k[i],3-i);
-	}
-}
-
-void serpent_enc(void* buffer, const serpent_ctx_t* ctx){
-	uint8_t i;
-	for(i=0; i<31; ++i){
-		memxor(buffer, ctx->k[i], 16);
-		sbox128(buffer, i);
-		serpent_lt((uint8_t*)buffer);
-	}
-	memxor(buffer, ctx->k[i], 16);
-	sbox128(buffer, i);
-	++i;
-	memxor(buffer, ctx->k[i], 16);
-}
-
-void serpent_dec(void* buffer, const serpent_ctx_t* ctx){
-	int8_t i=32;
-	
-	memxor(buffer, ctx->k[i], 16);
-	--i;
-	inv_sbox128(buffer, i);
-	memxor((uint8_t*)buffer, ctx->k[i], 16);
-	--i;
-	for(; i>=0; --i){
-		serpent_inv_lt(buffer);
-		inv_sbox128(buffer, i);
-		memxor(buffer, ctx->k[i], 16);
-	}
-}
-
-
-
-
-
diff --git a/serpent.h b/serpent.h
deleted file mode 100644
index 84e4a87..0000000
--- a/serpent.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/* serpent.h */
-/*
-    This file is part of the AVR-Crypto-Lib.
-    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-/** \file   serpent.h
- * \author  Daniel Otte
- * \license GPLv3
- * \brief a implementation of the serpent cipher for avr microcontrollers
- */
-
-#ifndef SERPENT_H_
-#define SERPENT_H_
-
-#include <stdint.h>
-
-typedef uint32_t serpent_subkey_t[4];
-
-typedef struct serpent_ctx_st {
-	serpent_subkey_t k[33];
-}  serpent_ctx_t;
-
-#define SERPENT_KEY128 128
-#define SERPENT_KEY192 192
-#define SERPENT_KEY256 256
-
-
-/* key must be 256bit (32 byte) large! */
-void serpent_init(const void* key, uint16_t keysize_b, serpent_ctx_t* ctx);
-void serpent_enc(void* buffer, const serpent_ctx_t* ctx);
-void serpent_dec(void* buffer, const serpent_ctx_t* ctx);
-
-
-#endif /*SERPENT_H_*/
diff --git a/serpent/memxor.S b/serpent/memxor.S
new file mode 100644
index 0000000..a32058b
--- /dev/null
+++ b/serpent/memxor.S
@@ -0,0 +1,66 @@
+/* memxor.S */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+ * File:        memxor.S
+ * Author:      Daniel Otte
+ * Date:        2008-08-07
+ * License:     GPLv3 or later
+ * Description: memxor, XORing one block into another
+ *
+ */
+
+/*
+ * void memxor(void* dest, const void* src, uint16_t n);
+ */
+ /*
+  * param dest is passed in r24:r25
+  * param src  is passed in r22:r23
+  * param n    is passed in r20:r21
+  */
+.global memxor
+memxor:
+	movw r30, r24
+	movw r26, r22
+	movw r24, r20
+	adiw r24, 0
+	breq 2f
+1:
+	ld r20, X+
+	ld r21, Z
+	eor r20, r21
+	st Z+, r20
+	sbiw r24, 1
+	brne 1b
+2:
+	ret
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/serpent/memxor.h b/serpent/memxor.h
new file mode 100644
index 0000000..a62a616
--- /dev/null
+++ b/serpent/memxor.h
@@ -0,0 +1,7 @@
+#ifndef MEMXOR_H_
+#define MEMXOR_H_
+#include <stdint.h>
+
+void memxor(void* dest, const void* src, uint16_t n);
+
+#endif
diff --git a/serpent/serpent-asm.S b/serpent/serpent-asm.S
new file mode 100644
index 0000000..f5f7cc5
--- /dev/null
+++ b/serpent/serpent-asm.S
@@ -0,0 +1,754 @@
+/* serpent_asm.S */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/* 
+ * File:        serpent_sboxes.S
+ * Author:      Daniel Otte
+ * Date:        2008-08-07
+ * License:     GPLv3 or later
+ * Description: Implementation of the serpent sbox function.
+ * 
+ */
+ 
+#include <avr/io.h>
+#include "avr-asm-macros.S"
+
+/*
+static void serpent_lt(uint8_t *b){
+	X0 = rotl32(X0, 13);
+	X2 = rotl32(X2,  3);
+	X1 ^= X0 ^ X2;
+	X3 ^= X2 ^ (X0 << 3);
+	X1 = rotl32(X1, 1);
+	X3 = rotl32(X3, 7);
+	X0 ^= X1 ^ X3;
+	X2 ^= X3 ^ (X1 << 7);
+	X0 = rotl32(X0, 5);
+	X2 = rotr32(X2, 10);
+}
+*/
+
+#if 0
+A0 =  4
+A1 =  5
+A2 =  6
+A3 =  7
+B0 =  8
+B1 =  9
+B2 = 10
+B3 = 11
+C0 = 12
+C1 = 13
+C2 = 14
+C3 = 15
+D0 = 16
+D1 = 17
+D2 = 18
+D3 = 19
+T0 = 20
+T1 = 21
+T2 = 22
+T3 = 23
+
+serpent_lt:
+	push_range 4, 17
+	movw r26, r24
+	ld A2, X+
+	ld A3, X+
+	ld A0, X+
+	ld A1, X+
+	ldi r20, 3
+	mov r0, A0
+1:	
+	lsr r0
+	ror A3
+	ror A2
+	ror A1
+	ror A0
+	dec r20
+	brne 1b
+	ld B0, X+
+	ld B1, X+
+	ld B2, X+
+	ld B3, X+
+	
+	ld C2, X+
+	ld C3, X+
+	ld C0, X+
+	ld C1, X+
+	ldi r20, 3
+	mov r0, C0
+1:	
+	lsr r0
+	ror C3
+	ror C2
+	ror C1
+	ror C0
+	dec r20
+	brne 1b
+
+	ld D0, X+
+	ld D1, X+
+	ld D2, X+
+	ld D3, X+
+	/* X1 ^= X0 ^ X2; */
+	eor B0, A0
+	eor B0, C0
+	eor B1, A1
+	eor B1, C1
+	eor B2, A2
+	eor B2, C2
+	eor B3, A3
+	eor B3, C3
+	/* X3 ^= X2 ^ (X0 << 3); */
+	mov T0, A0
+	mov T1, A1
+	mov T2, A2
+	mov T3, A3
+	ldi r24, 3
+1:
+	lsl T0
+	rol T1
+	rol T2
+	rol T3
+	dec r24
+	brne 1b
+	eor C0, B0
+	eor C0, T0
+	eor C1, B1
+	eor C1, T1
+	eor C2, B2
+	eor C2, T2
+	eor C3, B3
+	eor C3, T3
+	/*	X1 = rotl32(X1, 1); */
+	mov r0, B3
+	lsl r0
+	rol B0
+	rol B1
+	rol B2
+	rol B3
+	/* X3 = rotl32(X3, 7); */
+	mov r0, D3
+	mov D3, D2
+	mov D2, D1
+	mov D1, D0
+	mov D0, r0
+	lsr r0
+	ror D3
+	ror D2
+	ror D1
+	ror D0
+	/* 	X0 ^= X1 ^ X3; */
+	eor A0, B0
+	eor A0, D0
+	eor A1, B1
+	eor A1, D1
+	eor A2, B2
+	eor A2, D2
+	eor A3, B3
+	eor A3, D3
+	/*  X2 ^= X3 ^ (X1 << 7); */
+	mov T1, B0
+	mov T2, B1
+	mov T3, B2
+	clr T0
+	mov r0, B3
+	lsr r0
+	ror T2
+	ror T1
+	ror T0 
+	eor C0, D0
+	eor C0, T0
+	eor C1, D1
+	eor C1, T1
+	eor C2, D2
+	eor C2, T2
+	eor C3, D3
+	eor C3, T3
+	/* 	X0 = rotl32(X0, 5); */
+	ldi r24, 5
+	mov r0, A3
+1:	
+	lsl r0
+	rol A0
+	rol A1
+	rol A2
+	rol A3
+	dec r24
+	brne 1b
+	/* X2 = rotr32(X2, 10); */
+	mov r0, C0
+	mov C0, C1
+	mov C1, C2
+	mov C2, C3	
+	mov C3, r0
+	ldi r24, 2
+1:
+	lsr r0
+	ror C2
+	ror C1
+	ror C0
+	ror C3	
+	dec r24
+	brne 1b
+	
+	clr r31
+	ldi r30, D3+1
+	ldi r24, 16
+1:
+	ld r0, -Z
+	st -X, r0	
+	dec r24
+	brne 1b
+	
+	pop_range 4, 17
+	ret
+#endif
+
+T0 = 22
+T1 = 23
+T2 = 24
+T3 = 25
+TT = 21
+/* rotate the data word (4 byte) pointed to by X by r20 bits to the right */
+memrotr32:
+	ld T0, X+
+	ld T1, X+
+	ld T2, X+
+	ld T3, X+
+	mov TT, T0
+1:
+	lsr TT
+	ror T3
+	ror T2
+	ror T1
+	ror T0
+	dec r20
+	brne 1b
+	st -X, T3
+	st -X, T2
+	st -X, T1
+	st -X, T0
+	ret
+	
+/* rotate the data word (4 byte) pointed to by X by r20 bits to the left */
+memrotl32:
+	ld T0, X+
+	ld T1, X+
+	ld T2, X+
+	ld T3, X+
+	mov TT, T3
+1:
+	lsl TT
+	rol T0
+	rol T1
+	rol T2
+	rol T3
+	dec r20
+	brne 1b
+	st -X, T3
+	st -X, T2
+	st -X, T1
+	st -X, T0
+	ret
+
+/* xor the dataword (4 byte) pointed by Z into X */	
+memeor32:
+  ldi T2, 4
+1:  
+  ld T0, X		
+  ld T1, Z+
+  eor T0, T1
+  st X+, T0
+  dec T2
+  brne 1b
+  ret
+
+serpent_lt:
+	 /* X0 := X0 <<< 13 */
+	movw r26, r24
+	ldi r20, 7
+	rcall memrotl32
+	ldi r20, 6
+	rcall memrotl32
+	/* X2 := X2 <<< 3 */
+	adiw r26, 8
+	ldi r20, 3
+	rcall memrotl32
+	/* X1 ^= X2 */
+	movw r30, r26
+	sbiw r26, 4
+	rcall memeor32
+	/* X1 ^= X0 */
+	sbiw r26, 4
+	sbiw r30, 12
+	rcall memeor32
+	/* X3 ^= X2 */
+	movw r30, r26
+	adiw r26, 4
+	rcall memeor32
+	/* T := X0 */
+	sbiw r26, 16
+	ld r18, X+
+	ld r19, X+
+	ld r20, X+
+	ld r21, X+
+	/* T := T<<3 */
+	ldi r22, 3
+1:
+	lsl r18
+	rol r19
+	rol r20
+	rol r21
+	dec r22
+	brne 1b
+	clr r31 
+	/* X3 ^= T */
+	adiw r26, 8
+	ldi r30, 18
+	rcall memeor32
+	/* X1 := X1<<<1 */
+	sbiw r26, 12
+	ldi r20, 1
+	rcall memrotl32
+	/* X3 := X3<<<7 */
+	adiw r26, 8
+	ldi r20, 7
+	rcall memrotl32
+	/* X0 ^= X3 */
+	movw r30, r26
+	sbiw r26, 12
+	rcall memeor32
+	/* X0 ^= X1 */
+	movw r30, r26
+	sbiw r26, 4
+	rcall memeor32
+	/* X2 ^= X3 */
+	adiw r26, 4
+	adiw r30, 4
+	rcall memeor32
+	/* T := X1<<<8 */
+	sbiw r26, 8
+	ld r19, X+
+	ld r20, X+
+	ld r21, X+
+	ld r18, X+
+	/* T := T>>>1; T&=0xfffffff8 */
+	lsr r18
+	ror r21
+	ror r20
+	ror r19
+	clr r18
+	ror r18
+	clr r31
+	ldi r30, 18
+	/* X2 ^= T */
+	rcall memeor32
+	/* X0 := X0 <<< 5 */
+	sbiw r26, 12
+	ldi r20, 5
+	rcall memrotl32
+	/* X3 := X3 >>> 10 */
+	adiw r26, 8
+	ldi r20, 7
+	rcall memrotr32
+	ldi r20, 3
+	rcall memrotr32
+	ret
+
+serpent_inv_lt:
+	 /* X0 := X0 >>> 5 */
+	movw r26, r24
+	ldi r20, 5
+	rcall memrotr32
+	/* X2 := X2 <<< 10 */
+	adiw r26, 8
+	ldi r20, 7
+	rcall memrotl32
+	ldi r20, 3
+	rcall memrotl32
+	/* X2 ^= X3 */
+	movw r30, r26
+	adiw r30, 4
+	rcall memeor32
+	sbiw r26, 4
+	sbiw r30, 12
+	/* T := X1<<7 */
+	ld r19, Z+
+	ld r20, Z+
+	ld r21, Z+
+	ld r18, Z+
+	lsr r18
+	ror r21
+	ror r20
+	ror r19
+	clr r18
+	ror r18
+    clr r31
+    /* X2 ^= T */
+    ldi r30, 18
+    rcall memeor32
+    /* X0 ^= X1 */
+    sbiw r26, 12
+    movw r30, r26
+    adiw r30, 4
+    rcall memeor32
+    /* X0 ^= X3 */
+    sbiw r26, 4
+    adiw r30, 4
+    rcall memeor32
+    /* X1 := X1>>>1 */
+    ldi r20, 1
+	rcall memrotr32
+	/* X3 := X3>>>7 */
+	adiw r26, 8
+	ldi r20, 7
+	rcall memrotr32
+	/* X3 ^= X2 */
+	sbiw r30, 8
+	rcall memeor32
+	sbiw r26, 4
+	/* T:= X0<<3 */
+	sbiw r30, 12
+	ld r18, Z+
+	ld r19, Z+
+	ld r20, Z+
+	ld r21, Z+
+	ldi r24, 3
+1:
+	lsl r18
+	rol r19
+	rol r20
+	rol r21
+	dec r24
+	brne 1b
+	/* X3 ^= T */
+	clr r31
+	ldi r30, 18
+	rcall memeor32
+	/* X1 ^= X0 */
+	sbiw r26, 12
+	movw r30, r26
+	sbiw r30, 4
+	rcall memeor32
+	/* X1 ^= X2 */
+	movw r26, r30
+	adiw r30, 4
+	rcall memeor32
+	/* X2 := X2 >>> 3 */
+	ldi r20, 3
+	rcall memrotr32
+	/* X0 := X0 >>> 13 */
+	sbiw r26, 8
+	ldi r20, 7
+	rcall memrotr32
+	ldi r20, 6
+	rcall memrotr32
+	ret
+
+/*
+#define GOLDEN_RATIO 0x9e3779b9l
+
+static uint32_t serpent_gen_w(uint32_t * b, uint8_t i){
+	uint32_t ret;
+	ret = b[0] ^ b[3] ^ b[5] ^ b[7] ^ GOLDEN_RATIO ^ (uint32_t)i;
+	ret = rotl32(ret, 11);
+	return ret;
+}
+*/
+/*
+ * param b is passed in r24:r25
+ * param i is passed in r22
+ * return value is returned in r22.r23.r24.r25
+ */
+ /* trashes:
+  *  r20-r25, r30-r31
+  */
+serpent_gen_w:
+	movw r30, r24
+	/* ^i^b[0]*/
+    ld r21, Z+
+    eor r22, r21
+    ld r23, Z+
+    ld r24, Z+
+    ld r25, Z+
+    /* ^b[3]^b[5]^[b7] */
+    adiw r30, 4
+    ldi r20, 3
+1:    
+    adiw r30, 4
+    ld r21, Z+
+    eor r22, r21
+    ld r21, Z+
+    eor r23, r21
+    ld r21, Z+
+    eor r24, r21
+    ld r21, Z+
+    eor r25, r21
+	dec r20
+	brne 1b
+	/* ^0x9e3779b9l */
+	ldi r21, 0xb9
+	eor r22, r21
+	ldi r21, 0x79
+	eor r23, r21
+	ldi r21, 0x37
+	eor r24, r21
+	ldi r21, 0x9e
+	eor r25, r21
+	/* <<<11 */
+	mov r21, r25
+	mov r25, r24
+	mov r24, r23
+	mov r23, r22
+	mov r22, r21
+	mov r21, r25
+	ldi r20, 3
+1:
+	lsl r21
+	rol r22
+	rol r23
+	rol r24
+	rol r25
+	dec r20
+	brne 1b
+	ret
+
+/*
+ * void serpent_init(const void* key, uint16_t keysize_b, serpent_ctx_t* ctx)
+ */
+/*
+ * param key     is passed in r24:r25
+ * param keysize is passed in r22:r23
+ * param ctx     is passed in r20:r21
+ */
+.global serpent_init
+serpent_init:
+    stack_alloc 32
+    adiw r30, 1
+	push_ r30, r31
+    movw r26, r22
+    adiw r26, 7
+    tst r27
+    breq 1f
+	ldi r26, 32
+	rjmp 2f
+1:
+	lsr r26
+	lsr r26
+	lsr r26
+2:	
+	mov r22, r26
+	bst r22, 5 /* store in T if we have to do the "append 1 thing"*/
+	ldi r27, 32
+3:	/* set buffer to zero */
+	st Z+, r1
+	dec r27
+	brne 3b
+	
+	movw r26, r24 /* X points to the key */
+	sbiw r30, 32
+	tst r22
+	breq 5f /* if keylength_b==0 */
+4:	/* copy keybytes to buffer */
+	ld r19, X+
+	st Z+, r19
+	dec r22
+	brne 4b
+5:
+	brts 7f /* if keylength_b == 256 */
+	ldi r18, 0x01
+	andi r22, 0x07
+	brne 6f
+	st Z, r18
+	rjmp 7f
+6:	/* shift the one to the right position */
+	lsl r18
+	dec r22
+	brne 6b
+	or r18, r19
+	st -Z, r18
+7: /* post "appending 1 thing" buffer is ready for subkey generation */
+	movw r26, r20  /* X points to the context */
+	
+	pop_ r19, r18 /* r18:r19 points to the buffer */
+	push r16
+	clr r16
+8:
+	movw r24, r18
+	mov  r22, r16
+	rcall serpent_gen_w
+	movw r30, r18
+	ldi r20, 7*4
+1: /* the memmove */
+	ldd r0, Z+4
+	st Z+, r0
+	dec r20
+	brne 1b
+  /* store new word in buffer and context */	
+	st Z+, r22
+	st Z+, r23
+	st Z+, r24
+	st Z+, r25
+	st X+, r22
+	st X+, r23
+	st X+, r24
+	st X+, r25
+	
+	inc r16
+	cpi r16, 132
+	brne 8b	
+	
+	push_ r28, r29
+	movw r28, r26
+	subi r28, lo8(132*4)
+	sbci r29, hi8(132*4)
+	ldi r16, 33
+2:
+	movw r24, r28
+	adiw r28, 16
+	ldi r22, 2
+	add r22, r16
+	rcall sbox128
+	dec r16
+	brne 2b
+	pop_ r29, r28, r16
+	stack_free 32
+	ret
+
+/*
+ * void serpent_enc(void* buffer, const serpent_ctx_t* ctx){
+ */
+/*
+ * param buffer is passed in r24:r25
+ * param ctx    is passed in r22:r23
+ */
+.global serpent_enc
+serpent_enc:
+
+	push_ r12, r13, r14, r15, r16 
+	clr r16
+	movw r14, r24
+	movw r12, r22
+1:
+	movw r24, r14
+	movw r22, r12
+	ldi r20, 16
+	add r12, r20
+	adc r13, r1
+	clr r21
+	rcall memxor
+	movw r24, r14
+	mov r22, r16
+	rcall sbox128
+	movw r24, r14
+	rcall serpent_lt
+	
+	inc r16
+	cpi r16, 31
+	brne 1b
+	
+	movw r24, r14
+	movw r22, r12
+	ldi r20, 16
+	add r12, r20
+	adc r13, r1
+	clr r21
+	rcall memxor
+	movw r24, r14
+	mov r22, r16
+	rcall sbox128
+	
+	inc r16
+	movw r24, r14
+	movw r22, r12
+	ldi r20, 16
+	clr r21
+	pop_ r16, r15, r14, r13, r12
+	rjmp memxor
+
+/*
+ * void serpent_dec(void* buffer, const serpent_ctx_t* ctx){
+ */
+/*
+ * param buffer is passed in r24:r25
+ * param ctx    is passed in r22:r23
+ */
+.global serpent_dec
+serpent_dec:
+	push_ r12, r13, r14, r15, r16 
+	movw r14, r24
+//	ldi r16, lo8(32*16)
+//	add r22, r16
+	ldi r16, hi8(32*16)
+	add r23, r16
+	movw r12, r22
+	ldi r20, 16
+	clr r21
+	rcall memxor
+	
+	movw r24, r14
+	ldi r22, 31
+	call inv_sbox128
+	
+	movw r24, r14
+	ldi r20, 16
+	sub r12, r20
+	sbc r13, r1
+	movw r22, r12
+	clr r21
+	rcall memxor
+	ldi r16, 31
+1:
+	dec r16
+	movw r24, r14
+	rcall serpent_inv_lt
+	movw r24, r14
+	mov r22, r16
+	rcall inv_sbox128
+	movw r24, r14
+	ldi r20, 16
+	sub r12, r20
+	sbc r13, r1
+	movw r22, r12
+	clr r21
+	rcall memxor
+	
+	tst r16
+	brne 1b
+	pop_ r16, r15, r14, r13, r12
+	ret	
+	
+	
+	
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/serpent/serpent-sboxes-bitslice-asm.S b/serpent/serpent-sboxes-bitslice-asm.S
new file mode 100644
index 0000000..4e0d7db
--- /dev/null
+++ b/serpent/serpent-sboxes-bitslice-asm.S
@@ -0,0 +1,854 @@
+/* serpent-sboxes-bitslice.c */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/* serpent-sboxes.c
+ * a bitsliced implementation of the serpent sboxes
+ * author: Daniel Otte 
+ * email:  daniel.otte@rub.de
+ * license: GPLv3
+ */
+
+#include "avr-asm-macros.S"
+IN0  = 22
+IN1  = 23
+IN2  = 24
+IN3  = 25
+OUT0 = 18
+OUT1 = 19
+OUT2 = 20
+OUT3 = 21
+T00 =  2
+T00 =  3
+T01 =  4
+T02 =  5
+T03 =  6
+T04 =  7
+T05 =  8
+T06 =  9
+T07 = 10
+T08 = 11
+T09 = 12
+T10 = 13
+T11 = 14
+T12 = 15
+T13 = 16
+T14 = 17
+T15 = 26
+T16 = 27
+T17 =  0
+
+/* S0:   3  8 15  1 10  6  5 11 14 13  4  2  7  0  9 12 */
+
+/* depth = 5,7,4,2, Total gates=18 */
+sb0:
+    mov T00, IN1
+    eor T00, IN2
+    mov T01, IN0
+    or  T01, IN3
+    mov T02, IN0
+    eor T02, IN1
+    mov OUT3, T01
+    eor OUT3, T00
+    mov T04, IN2
+    or  T04, OUT3
+    mov T05, IN0
+    eor T05, IN3
+    mov T06, IN1
+    or  T06, IN2
+    mov T07, IN3
+    and T07, T04
+    mov T08, T02
+    and T08, T06
+    mov OUT2, T08
+    eor OUT2, T07
+    mov T10, T08
+    and T10, OUT2
+    mov T11, IN2
+    eor T11, IN3
+    mov T12, T06
+    eor T12, T10
+    mov T13, IN1
+    and T13, T05
+    mov T14, T05
+    eor T14, T12
+	mov OUT0, T14
+	com OUT0
+	mov T16, OUT0
+    eor T16, T13
+    mov OUT1, T11
+    eor OUT1, T16
+	ret
+
+	
+/* InvS0:  13  3 11  0 10  6  5 12  1 14  4  7 15  9  8  2 */
+
+/* depth = 8,4,3,6, Total gates=19 */
+sb0_inv:
+    mov T00, IN2
+    eor T00, IN3
+    mov T01, IN0
+    or  T01, IN1
+    mov T02, IN1
+    or  T02, IN2
+    mov T03, IN2
+    and T03, T00
+    mov T04, T01
+    eor T04, T00
+    mov T05, IN0
+    or  T05, T03
+	mov OUT2, T04
+	com OUT2
+    mov T07, IN1
+    eor T07, IN3
+    mov T08, T02
+    and T08, T07
+    mov T09, IN3
+    or  T09, OUT2
+    mov OUT1, T08
+    eor OUT1, T05
+    mov T11, IN0
+    or  T11, T04
+    mov T12, OUT1
+    eor T12, T11
+    mov T13, T02
+    eor T13, T09
+    mov T14, IN0
+    eor T14, IN2
+    mov OUT3, T13
+    eor OUT3, T12
+    mov T16, T04
+    and T16, T12
+    mov T17, T13
+    or  T17, T16
+    mov OUT0, T14
+    eor OUT0, T17
+	ret
+
+
+/* S1:  15 12  2  7  9  0  5 10  1 11 14  8  6 13  3  4 */
+
+/* depth = 10,7,3,5, Total gates=18 */
+sb1:
+    mov T00, IN0
+    or  T00, IN3
+    mov T01, IN2
+    eor T01, IN3
+	mov T02, IN1
+	com T02
+    mov T03, IN0
+    eor T03, IN2
+    mov T04, IN0
+    or  T04, T02
+    mov T05, IN3
+    and T05, T03
+    mov T06, T00
+    and T06, T01
+    mov T07, IN1
+    or  T07, T05
+    mov OUT2, T01
+    eor OUT2, T04
+    mov T09, T06
+    eor T09, T07
+    mov T10, T00
+    eor T10, T09
+    mov T11, OUT2
+    eor T11, T10
+    mov T12, IN1
+    and T12, IN3
+	mov OUT3, T09
+	com OUT3
+    mov OUT1, T12
+    eor OUT1, T11
+    mov T15, T09
+    or  T15, OUT1
+    mov T16, T04
+    and T16, T15
+    mov OUT0, IN2
+    eor OUT0, T16
+	ret
+
+
+/* InvS1:   5  8  2 14 15  6 12  3 11  4  7  9  1 13 10  0 */
+
+/* depth = 7,4,5,3, Total gates=18 */
+sb1_inv:
+    mov T00, IN0
+    eor T00, IN1
+    mov T01, IN1
+    or  T01, IN3
+    mov T02, IN0
+    and T02, IN2
+    mov T03, IN2
+    eor T03, T01
+    mov T04, IN0
+    or  T04, T03
+    mov T05, T00
+    and T05, T04
+    mov T06, IN3
+    or  T06, T02
+    mov T07, IN1
+    eor T07, T05
+    mov T08, T06
+    eor T08, T05
+    mov T09, T03
+    or  T09, T02
+    mov T10, IN3
+    and T10, T07
+    mov OUT2, T08
+    com OUT2
+	mov OUT1, T09
+    eor OUT1, T10
+    mov T13, IN0
+    or  T13, OUT2
+    mov T14, T05
+    eor T14, OUT1
+    mov OUT3, T00
+    eor OUT3, T03
+    mov T16, IN2
+    eor T16, T14
+    mov OUT0, T13
+    eor OUT0, T16
+	ret
+
+/* S2:   8  6  7  9  3 12 10 15 13  1 14  4  0 11  5  2 */
+
+/* depth = 3,8,11,7, Total gates=16 */
+sb2:
+    mov T00, IN0
+    or  T00, IN2
+    mov T01, IN0
+    eor T01, IN1
+    mov T02, IN3
+    eor T02, T00
+    mov OUT0, T01
+    eor OUT0, T02
+    mov T04, IN2
+    eor T04, OUT0
+    mov T05, IN1
+    eor T05, T04
+    mov T06, IN1
+    or  T06, T04
+    mov T07, T00
+    and T07, T05
+    mov T08, T02
+    eor T08, T06
+    mov T09, T01
+    or  T09, T08
+    mov OUT1, T09
+    eor OUT1, T07
+    mov T11, IN0
+    or  T11, IN3
+    mov T12, T08
+    eor T12, OUT1
+    mov T13, IN1
+    eor T13, T12
+	mov OUT3, T08
+	com OUT3
+    mov OUT2, T11
+    eor OUT2, T13
+	ret
+
+/* InvS2:  12  9 15  4 11 14  1  2  0  3  6 13  5  8 10  7 */
+
+/* depth = 3,6,8,3, Total gates=18 */
+sb2_inv:
+    mov T00, IN0
+    eor T00, IN3
+    mov T01, IN2
+    eor T01, IN3
+    mov T02, IN0
+    and T02, IN2
+    mov T03, IN1
+    or  T03, T01
+    mov OUT0, T00
+    eor OUT0, T03
+    mov T05, IN0
+    or  T05, IN2
+    mov T06, IN3
+    or  T06, OUT0
+	mov T07, IN3
+	com T07
+    mov T08, IN1
+    and T08, T05
+    mov T09, T07
+    or  T09, T02
+    mov T10, IN1
+    and T10, T06
+    mov T11, T05
+    and T11, T01
+    mov OUT3, T08
+    eor OUT3, T09
+    mov OUT1, T11
+    eor OUT1, T10
+    mov T14, IN2
+    and T14, OUT3
+    mov T15, OUT0
+    eor T15, OUT1
+    mov T16, T09
+    eor T16, T14
+    mov OUT2, T15
+    eor OUT2, T16
+	ret
+
+/* S3:   0 15 11  8 12  9  6  3 13  1  2  4 10  7  5 14 */
+
+/* depth = 8,3,5,5, Total gates=18 */
+sb3:
+    mov T00, IN0
+    eor T00, IN2
+    mov T01, IN0
+    or  T01, IN3
+    mov T02, IN0
+    and T02, IN3
+    mov T03, T00
+    and T03, T01
+    mov T04, IN1
+    or  T04, T02
+    mov T05, IN0
+    and T05, IN1
+    mov T06, IN3
+    eor T06, T03
+    mov T07, IN2
+    or  T07, T05
+    mov T08, IN1
+    eor T08, T06
+    mov T09, IN3
+    and T09, T04
+    mov T10, T01
+    eor T10, T09
+    mov OUT3, T07
+    eor OUT3, T08
+    mov T12, IN3
+    or  T12, OUT3
+    mov T13, IN0
+    or  T13, T06
+    mov T14, IN1
+    and T14, T12
+    mov OUT2, T07
+    eor OUT2, T10
+    mov OUT0, T13
+    eor OUT0, T14
+    mov OUT1, T04
+    eor OUT1, T03
+	ret
+
+/* InvS3:   0  9 10  7 11 14  6 13  3  5 12  2  4  8 15  1 */
+
+/* depth = 3,6,4,4, Total gates=17 */
+sb3_inv:
+    mov T00, IN2
+    or  T00, IN3
+    mov T01, IN0
+    or  T01, IN3
+    mov T02, IN2
+    eor T02, T01
+    mov T03, IN1
+    eor T03, T01
+    mov T04, IN0
+    eor T04, IN3
+    mov T05, T03
+    and T05, T02
+    mov T06, IN1
+    and T06, T00
+    mov OUT2, T04
+    eor OUT2, T05
+    mov T08, IN0
+    eor T08, T02
+    mov OUT0, T06
+    eor OUT0, T02
+    mov T10, OUT0
+    or  T10, T04
+    mov T11, T08
+    and T11, T10
+    mov T12, IN0
+    and T12, OUT2
+    mov T13, T00
+    eor T13, T04
+    mov OUT1, IN1
+    eor OUT1, T11
+    mov T15, IN1
+    or  T15, T12
+    mov OUT3, T13
+    eor OUT3, T15
+	ret
+
+/* S4:   1 15  8  3 12  0 11  6  2  5  4 10  9 14  7 13 */
+
+/* depth = 6,7,5,3, Total gates=19 */
+sb4:
+    mov T00, IN0
+    or  T00, IN1
+    mov T01, IN1
+    or  T01, IN2
+    mov T02, IN0
+    eor T02, T01
+    mov T03, IN1
+    eor T03, IN3
+    mov T04, IN3
+    or  T04, T02
+    mov T05, IN3
+    and T05, T00
+    mov OUT3, T02
+    eor OUT3, T05
+    mov T07, OUT3
+    and T07, T03
+    mov T08, T03
+    and T08, T04
+    mov T09, IN2
+    eor T09, T05
+    mov T10, IN1
+    and T10, IN2
+    mov T11, T03
+    eor T11, T07
+    mov T12, T10
+    or  T12, T02
+    mov T13, T09
+    eor T13, T08
+    mov T14, IN0
+    and T14, T04
+    mov T15, T10
+    or  T15, T11
+    mov OUT2, T12
+    eor OUT2, T07
+    mov OUT1, T14
+    eor OUT1, T15
+	mov OUT0, T13
+	com OUT0
+	ret
+
+/* InvS4:   5  0  8  3 10  9  7 14  2 12 11  6  4 15 13  1 */
+
+/* depth = 6,4,7,3, Total gates=17 */
+sb4_inv:
+    mov T00, IN1
+    or  T00, IN3
+    mov T01, IN2
+    or  T01, IN3
+    mov T02, IN0
+    and T02, T00
+    mov T03, IN1
+    eor T03, T01
+    mov T04, IN2
+    eor T04, IN3
+	mov T05, T02
+	com T05
+    mov T06, IN0
+    and T06, T03
+    mov OUT1, T04
+    eor OUT1, T06
+    mov T08, OUT1
+    or  T08, T05
+    mov T09, IN0
+    eor T09, T06
+    mov T10, T00
+    eor T10, T08
+    mov T11, IN3
+    eor T11, T03
+    mov T12, IN2
+    or  T12, T09
+    mov OUT3, T02
+    eor OUT3, T11
+    mov T14, IN0
+    eor T14, T03
+    mov OUT2, T10
+    eor OUT2, T12
+    mov OUT0, T14
+    eor OUT0, T08
+	ret
+
+/* S5:  15  5  2 11  4 10  9 12  0  3 14  8 13  6  7  1 */
+
+/* depth = 4,6,8,6, Total gates=17 */
+sb5:
+    mov T00, IN1
+    eor T00, IN3
+    mov T01, IN1
+    or  T01, IN3
+    mov T02, IN0
+    and T02, T00
+    mov T03, IN2
+    eor T03, T01
+    mov T04, T02
+    eor T04, T03
+	mov OUT0, T04
+	com OUT0
+    mov T06, IN0
+    eor T06, T00
+    mov T07, IN3
+    or  T07, OUT0
+    mov T08, IN1
+    or  T08, T04
+    mov T09, IN3
+    eor T09, T07
+    mov T10, IN1
+    or  T10, T06
+    mov T11, T02
+    or  T11, OUT0
+    mov T12, T06
+    or  T12, T09
+    mov T13, T00
+    eor T13, T10
+    mov OUT2, T08
+    eor OUT2, T12
+    mov OUT1, T06
+    eor OUT1, T07
+    mov OUT3, T11
+    eor OUT3, T13
+	ret
+
+/* InvS5:   8 15  2  9  4  1 13 14 11  6  5  3  7 12 10  0 */
+
+/* depth = 4,6,9,7, Total gates=17 */
+sb5_inv:
+    mov T00, IN0
+    and T00, IN3
+    mov T01, IN2
+    eor T01, T00
+    mov T02, IN0
+    eor T02, IN3
+    mov T03, IN1
+    and T03, T01
+    mov T04, IN0
+    and T04, IN2
+    mov OUT0, T02
+    eor OUT0, T03
+    mov T06, IN0
+    and T06, OUT0
+    mov T07, T00
+    eor T07, OUT0
+    mov T08, IN1
+    or  T08, T04
+	mov T09, IN1
+	com T09
+    mov OUT1, T07
+    eor OUT1, T08
+    mov T11, T09
+    or  T11, T06
+    mov T12, OUT0
+    or  T12, OUT1
+    mov OUT3, T01
+    eor OUT3, T11
+    mov T14, T01
+    eor T14, T12
+    mov T15, IN1
+    eor T15, IN3
+    mov OUT2, T15
+    eor OUT2, T14
+	ret
+
+/* S6:   7  2 12  5  8  4  6 11 14  9  1 15 13  3 10  0 */
+
+/* depth = 8,3,6,3, Total gates=19 */
+sb6:
+    mov T00, IN0
+    and T00, IN3
+    mov T01, IN1
+    eor T01, IN2
+    mov T02, IN0
+    eor T02, IN3
+    mov T03, T00
+    eor T03, T01
+    mov T04, IN1
+    or  T04, IN2
+	mov OUT1, T03
+	com OUT1
+    mov T06, T02
+    and T06, T04
+    mov T07, IN1
+    and T07, OUT1
+    mov T08, IN0
+    or  T08, IN2
+    mov T09, T06
+    eor T09, T07
+    mov T10, IN1
+    or  T10, IN3
+    mov T11, IN2
+    eor T11, T10
+    mov T12, T08
+    eor T12, T09
+	mov OUT2, T12
+	com OUT2
+    mov T14, OUT1
+    and T14, T02
+    mov OUT3, T11
+    eor OUT3, T06
+    mov T16, IN0
+    eor T16, IN1
+    mov T17, OUT2
+    eor T17, T14
+    mov OUT0, T16
+    eor OUT0, T17
+	ret
+
+/* InvS6:  15 10  1 13  5  3  6  0  4  9 14  7  2 12  8 11 */
+
+/* depth = 5,3,8,6, Total gates=19 */
+sb6_inv:
+    mov T00, IN0
+    eor T00, IN2
+	mov T01, IN2
+	com T01
+    mov T02, IN1
+    and T02, T00
+    mov T03, IN1
+    or  T03, T01
+    mov T04, IN3
+    or  T04, T02
+    mov T05, IN1
+    eor T05, IN3
+    mov T06, IN0
+    and T06, T03
+    mov T07, IN0
+    or  T07, T01
+    mov T08, T06
+    eor T08, T04
+    mov OUT1, T05
+    eor OUT1, T07
+	mov OUT0, T08
+	com OUT0
+    mov T11, IN1
+    and T11, OUT0
+    mov T12, T00
+    and T12, T04
+    mov T13, T00
+    eor T13, T11
+    mov T14, T06
+    eor T14, T12
+    mov T15, IN3
+    or  T15, T01
+    mov T16, IN0
+    eor T16, OUT1
+    mov OUT3, T16
+    eor OUT3, T14
+    mov OUT2, T15
+    eor OUT2, T13
+	ret
+
+/* S7:   1 13 15  0 14  8  2 11  7  4 12 10  9  3  5  6 */
+
+/* depth = 10,7,10,4, Total gates=19 */
+sb7:
+    mov T00, IN0
+    and T00, IN2
+	mov T01, IN3
+	com T01
+    mov T02, IN0
+    and T02, T01
+    mov T03, IN1
+    or  T03, T00
+    mov T04, IN0
+    and T04, IN1
+    mov T05, IN2
+    eor T05, T03
+    mov OUT3, T02
+    eor OUT3, T05
+    mov T07, IN2
+    or  T07, OUT3
+    mov T08, IN3
+    or  T08, T04
+    mov T09, IN0
+    eor T09, T07
+    mov T10, T03
+    and T10, OUT3
+    mov OUT1, T08
+    eor OUT1, T09
+    mov T12, IN1
+    eor T12, OUT1
+    mov T13, T00
+    eor T13, OUT1
+    mov T14, IN2
+    eor T14, T04
+    mov T15, T10
+    or  T15, T12
+    mov T16, T01
+    or  T16, T13
+    mov OUT0, T14
+    eor OUT0, T16
+    mov OUT2, IN0
+    eor OUT2, T15
+	ret
+
+/* InvS7:   3  0  6 13  9 14 15  8  5 12 11  7 10  1  4  2 */
+
+/* depth = 9,7,3,3, Total gates=18 */
+sb7_inv:
+    mov T00, IN0
+    and T00, IN1
+    mov T01, IN0
+    or  T01, IN1
+    mov T02, IN2
+    or  T02, T00
+    mov T03, IN3
+    and T03, T01
+    mov OUT3, T02
+    eor OUT3, T03
+    mov T05, IN1
+    eor T05, T03
+    mov T06, IN3
+    eor T06, OUT3
+    mov T07, T06
+    com T07
+    mov T08, T05
+    or  T08, T07
+    mov T09, IN1
+    eor T09, IN3
+    mov T10, IN0
+    or  T10, IN3
+    mov OUT1, IN0
+    eor OUT1, T08
+    mov T12, IN2
+    eor T12, T05
+    mov T13, IN2
+    and T13, T10
+    mov T14, IN3
+    or  T14, OUT1
+    mov T15, T00
+    or  T15, T09
+    mov OUT0, T12
+    eor OUT0, T14
+    mov OUT2, T13
+    eor OUT2, T15
+	ret
+
+sf_tab:
+.word sb0, sb1, sb2, sb3
+.word sb4, sb5, sb6, sb7
+
+sinvf_tab:
+.word sb0_inv, sb1_inv, sb2_inv, sb3_inv
+.word sb4_inv, sb5_inv, sb6_inv, sb7_inv
+
+/*
+.byte pm_lo8(sb0), pm_hi8(sb0)
+.byte pm_lo8(sb1), pm_hi8(sb1)
+.byte pm_lo8(sb2), pm_hi8(sb2)
+.byte pm_lo8(sb3), pm_hi8(sb3)
+.byte pm_lo8(sb4), pm_hi8(sb4)
+.byte pm_lo8(sb5), pm_hi8(sb5)
+.byte pm_lo8(sb6), pm_hi8(sb6)
+.byte pm_lo8(sb7), pm_hi8(sb7)
+
+
+sinvf_tab:
+.byte pm_lo8(sb0_inv), pm_hi8(sb0_inv)
+.byte pm_lo8(sb1_inv), pm_hi8(sb1_inv)
+.byte pm_lo8(sb2_inv), pm_hi8(sb2_inv)
+.byte pm_lo8(sb3_inv), pm_hi8(sb3_inv)
+.byte pm_lo8(sb4_inv), pm_hi8(sb4_inv)
+.byte pm_lo8(sb5_inv), pm_hi8(sb5_inv)
+.byte pm_lo8(sb6_inv), pm_hi8(sb6_inv)
+.byte pm_lo8(sb7_inv), pm_hi8(sb7_inv)
+*/
+/*
+void sbox128(void * w, uint8_t box){
+	uint8_t i, buffer[16];
+	box &= 0x7;
+	
+	sb_fpt fp;
+	fp = (sb_fpt)pgm_read_word(&(sf_tab[box]));
+	for(i=0; i<4; ++i){
+		fp(buffer+i, (uint8_t*)w+i);
+	}
+	memcpy(w, buffer, 16);
+}
+*/
+.global sbox128
+sbox128:
+	ldi r30, lo8(sf_tab)
+	ldi r31, hi8(sf_tab)
+1:
+;	clr r1
+	andi r22, 0x07
+	lsl r22
+	add r30, r22
+	adc r31, r1
+	lpm r26, Z+
+	lpm r27, Z
+	lsr r27
+	ror r26
+	push r28
+	push r29
+	movw r30, r26
+	movw r28, r24
+	push_range 2, 17
+	ldd IN0, Y+0
+	ldd IN1, Y+4
+	ldd IN2, Y+8
+	ldd IN3, Y+12
+	icall 
+	std Y+0, OUT0
+	std Y+4, OUT1
+	std Y+8, OUT2
+	std Y+12, OUT3
+	ldd IN0, Y+0+1
+	ldd IN1, Y+4+1
+	ldd IN2, Y+8+1
+	ldd IN3, Y+12+1
+	icall
+	std Y+0+1, OUT0
+	std Y+4+1, OUT1
+	std Y+8+1, OUT2
+	std Y+12+1, OUT3
+	ldd IN0, Y+0+2
+	ldd IN1, Y+4+2
+	ldd IN2, Y+8+2
+	ldd IN3, Y+12+2
+	icall
+	std Y+0+2, OUT0
+	std Y+4+2, OUT1
+	std Y+8+2, OUT2
+	std Y+12+2, OUT3
+	ldd IN0, Y+0+3
+	ldd IN1, Y+4+3
+	ldd IN2, Y+8+3
+	ldd IN3, Y+12+3
+	icall
+	std Y+0+3, OUT0
+	std Y+4+3, OUT1
+	std Y+8+3, OUT2
+	std Y+12+3, OUT3
+	pop_range 2, 17
+	pop r29
+	pop r28
+	ret
+	
+.global	inv_sbox128
+inv_sbox128:
+	ldi r30, lo8(sinvf_tab)
+	ldi r31, hi8(sinvf_tab)
+	rjmp 1b	
+/*	
+void inv_sbox128(void * w, uint8_t box){
+	uint8_t i, buffer[16];
+	box &= 0x7;
+	
+	sb_fpt fp;
+	fp = (sb_fpt)pgm_read_word(&(sinvf_tab[box]));
+	for(i=0; i<4; ++i){
+		fp(buffer+i, (uint8_t*)w+i);
+	}
+	memcpy(w, buffer, 16);
+}
+*/
+
+
+
+
+
+
+
diff --git a/serpent/serpent-sboxes-bitslice.c b/serpent/serpent-sboxes-bitslice.c
new file mode 100644
index 0000000..96f9c68
--- /dev/null
+++ b/serpent/serpent-sboxes-bitslice.c
@@ -0,0 +1,479 @@
+/* serpent-sboxes-bitslice.c */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/* serpent-sboxes.c
+ * a bitsliced implementation of the serpent sboxes
+ * author: Daniel Otte 
+ * email:  daniel.otte@rub.de
+ * license: GPLv3
+ */
+
+#include <stdint.h>
+#include <string.h>
+#include <avr/pgmspace.h>
+#include "serpent-sboxes.h"
+
+
+
+
+/* S0:   3  8 15  1 10  6  5 11 14 13  4  2  7  0  9 12 */
+
+/* depth = 5,7,4,2, Total gates=18 */
+static
+void sb0(uint8_t* out, const uint8_t* in){
+// (a,b,c,d,w,x,y,z) 
+	uint8_t t01, t02, t03, t05, t06, t07, t08, t09, t11, t12, t13, t14, t15, t17;
+	t01          = in[4*1]  ^ in[4*2]; 
+	t02          = in[4*0]  | in[4*3]; 
+	t03          = in[4*0]  ^ in[4*1]; 
+	out[4*3]   = t02        ^ t01; 
+	t05          = in[4*2]  | out[4*3]; 
+	t06          = in[4*0]  ^ in[4*3]; 
+	t07          = in[4*1]  | in[4*2]; 
+	t08          = in[4*3]  & t05; 
+	t09          = t03        & t07; 
+	out[4*2]   = t09        ^ t08; 
+	t11          = t09        & out[4*2]; 
+	t12          = in[4*2]  ^ in[4*3]; 
+	t13          = t07        ^ t11; 
+	t14          = in[4*1]  & t06; 
+	t15          = t06        ^ t13; 
+	out[4*0]   =     ~ t15; 
+	t17          = out[4*0] ^ t14; 
+	out[4*1]   = t12        ^ t17; 
+}
+
+/* InvS0:  13  3 11  0 10  6  5 12  1 14  4  7 15  9  8  2 */
+
+/* depth = 8,4,3,6, Total gates=19 */
+static
+void sb0_inv(uint8_t* out, const uint8_t* in){
+	uint8_t t02, t03, t04, t05, t06, t08, t09, t10, t12, t13, t14, t15, t17, t18, t01;
+	t01          = in[4*2] ^ in[4*3];
+	t02          = in[4*0] | in[4*1];
+	t03          = in[4*1] | in[4*2];
+	t04          = in[4*2] & t01;
+	t05          = t02       ^ t01;
+	t06          = in[4*0] | t04;
+	out[4*2]   =     ~ t05;
+	t08          = in[4*1]   ^ in[4*3];
+	t09          = t03       & t08;
+	t10          = in[4*3]   | out[4*2];
+	out[4*1]   = t09       ^ t06;
+	t12          = in[4*0]   | t05;
+	t13          = out[4*1]   ^ t12;
+	t14          = t03       ^ t10;
+	t15          = in[4*0]   ^ in[4*2];
+	out[4*3]   = t14       ^ t13;
+	t17          = t05       & t13;
+	t18          = t14       | t17;
+	out[4*0]   = t15       ^ t18; 
+}
+
+/* S1:  15 12  2  7  9  0  5 10  1 11 14  8  6 13  3  4 */
+
+/* depth = 10,7,3,5, Total gates=18 */
+static 
+void sb1(uint8_t* out, const uint8_t* in){
+	uint8_t t02, t03, t04, t05, t06, t07, t08, t10, t11, t12, t13, t16, t17, t01;
+	t01          = in[4*0]   | in[4*3];
+	t02          = in[4*2]   ^ in[4*3];
+	t03          =     ~ in[4*1];
+	t04          = in[4*0]   ^ in[4*2];
+	t05          = in[4*0]   | t03;
+	t06          = in[4*3]   & t04;
+	t07          = t01       & t02;
+	t08          = in[4*1]   | t06;
+	out[4*2]   = t02       ^ t05;
+	t10          = t07       ^ t08;
+	t11          = t01       ^ t10;
+	t12          = out[4*2]   ^ t11;
+	t13          = in[4*1]   & in[4*3];
+	out[4*3]   =     ~ t10;
+	out[4*1]   = t13       ^ t12;
+	t16          = t10       | out[4*1];
+	t17          = t05       & t16;
+	out[4*0]   = in[4*2]   ^ t17; 
+}
+
+/* InvS1:   5  8  2 14 15  6 12  3 11  4  7  9  1 13 10  0 */
+
+/* depth = 7,4,5,3, Total gates=18 */
+static void sb1_inv(uint8_t* out, const uint8_t* in){
+	uint8_t t02, t03, t04, t05, t06, t07, t08, t09, t10, t11, t14, t15, t17, t01;
+	t01          = in[4*0]   ^ in[4*1];
+	t02          = in[4*1]   | in[4*3];
+	t03          = in[4*0]   & in[4*2];
+	t04          = in[4*2]   ^ t02;
+	t05          = in[4*0]   | t04;
+	t06          = t01       & t05;
+	t07          = in[4*3]   | t03;
+	t08          = in[4*1]   ^ t06;
+	t09          = t07       ^ t06;
+	t10          = t04       | t03;
+	t11          = in[4*3]   & t08;
+	out[4*2]   =     ~ t09;
+	out[4*1]   = t10       ^ t11;
+	t14          = in[4*0]   | out[4*2];
+	t15          = t06       ^ out[4*1];
+	out[4*3]   = t01       ^ t04;
+	t17          = in[4*2]   ^ t15;
+	out[4*0]   = t14       ^ t17; 
+}
+
+/* S2:   8  6  7  9  3 12 10 15 13  1 14  4  0 11  5  2 */
+
+/* depth = 3,8,11,7, Total gates=16 */
+static void sb2(uint8_t* out, const uint8_t* in){
+	uint8_t t02, t03, t05, t06, t07, t08, t09, t10, t12, t13, t14, t01;
+	t01          = in[4*0]   | in[4*2];
+	t02          = in[4*0]   ^ in[4*1];
+	t03          = in[4*3]   ^ t01;
+	out[4*0]   = t02       ^ t03;
+	t05          = in[4*2]   ^ out[4*0];
+	t06          = in[4*1]   ^ t05;
+	t07          = in[4*1]   | t05;
+	t08          = t01       & t06;
+	t09          = t03       ^ t07;
+	t10          = t02       | t09;
+	out[4*1]   = t10       ^ t08;
+	t12          = in[4*0]   | in[4*3];
+	t13          = t09       ^ out[4*1];
+	t14          = in[4*1]   ^ t13;
+	out[4*3]   =     ~ t09;
+	out[4*2]   = t12       ^ t14; 
+}
+
+/* InvS2:  12  9 15  4 11 14  1  2  0  3  6 13  5  8 10  7 */
+
+/* depth = 3,6,8,3, Total gates=18 */
+static void sb2_inv(uint8_t* out, const uint8_t* in){
+	uint8_t t02, t03, t04, t06, t07, t08, t09, t10, t11, t12, t15, t16, t17, t01;
+	t01          = in[4*0]   ^ in[4*3];
+	t02          = in[4*2]   ^ in[4*3];
+	t03          = in[4*0]   & in[4*2];
+	t04          = in[4*1]   | t02;
+	out[4*0]   = t01       ^ t04;
+	t06          = in[4*0]   | in[4*2];
+	t07          = in[4*3]   | out[4*0];
+	t08          =     ~ in[4*3];
+	t09          = in[4*1]   & t06;
+	t10          = t08       | t03;
+	t11          = in[4*1]   & t07;
+	t12          = t06       & t02;
+	out[4*3]   = t09       ^ t10;
+	out[4*1]   = t12       ^ t11;
+	t15          = in[4*2]   & out[4*3];
+	t16          = out[4*0]   ^ out[4*1];
+	t17          = t10       ^ t15;
+	out[4*2]   = t16       ^ t17; 
+}
+
+/* S3:   0 15 11  8 12  9  6  3 13  1  2  4 10  7  5 14 */
+
+/* depth = 8,3,5,5, Total gates=18 */
+static void sb3(uint8_t* out, const uint8_t* in){
+	uint8_t t02, t03, t04, t05, t06, t07, t08, t09, t10, t11, t13, t14, t15, t01;
+	t01          = in[4*0]   ^ in[4*2];
+	t02          = in[4*0]   | in[4*3];
+	t03          = in[4*0]   & in[4*3];
+	t04          = t01       & t02;
+	t05          = in[4*1]   | t03;
+	t06          = in[4*0]   & in[4*1];
+	t07          = in[4*3]   ^ t04;
+	t08          = in[4*2]   | t06;
+	t09          = in[4*1]   ^ t07;
+	t10          = in[4*3]   & t05;
+	t11          = t02       ^ t10;
+	out[4*3]   = t08       ^ t09;
+	t13          = in[4*3]   | out[4*3];
+	t14          = in[4*0]   | t07;
+	t15          = in[4*1]   & t13;
+	out[4*2]   = t08       ^ t11;
+	out[4*0]   = t14       ^ t15;
+	out[4*1]   = t05       ^ t04; 
+}
+
+/* InvS3:   0  9 10  7 11 14  6 13  3  5 12  2  4  8 15  1 */
+
+/* depth = 3,6,4,4, Total gates=17 */
+static void sb3_inv(uint8_t* out, const uint8_t* in){
+	uint8_t t02, t03, t04, t05, t06, t07, t09, t11, t12, t13, t14, t16, t01;
+	t01          = in[4*2]   | in[4*3];
+	t02          = in[4*0]   | in[4*3];
+	t03          = in[4*2]   ^ t02;
+	t04          = in[4*1]   ^ t02;
+	t05          = in[4*0]   ^ in[4*3];
+	t06          = t04       & t03;
+	t07          = in[4*1]   & t01;
+	out[4*2]   = t05       ^ t06;
+	t09          = in[4*0]   ^ t03;
+	out[4*0]   = t07       ^ t03;
+	t11          = out[4*0]   | t05;
+	t12          = t09       & t11;
+	t13          = in[4*0]   & out[4*2];
+	t14          = t01       ^ t05;
+	out[4*1]   = in[4*1]   ^ t12;
+	t16          = in[4*1]   | t13;
+	out[4*3]   = t14       ^ t16; 
+}
+
+/* S4:   1 15  8  3 12  0 11  6  2  5  4 10  9 14  7 13 */
+
+/* depth = 6,7,5,3, Total gates=19 */
+static void sb4(uint8_t* out, const uint8_t* in){
+	uint8_t t02, t03, t04, t05, t06, t08, t09, t10, t11, t12, t13, t14, t15, t16, t01;
+	t01          = in[4*0]   | in[4*1];
+	t02          = in[4*1]   | in[4*2];
+	t03          = in[4*0]   ^ t02;
+	t04          = in[4*1]   ^ in[4*3];
+	t05          = in[4*3]   | t03;
+	t06          = in[4*3]   & t01;
+	out[4*3]   = t03       ^ t06;
+	t08          = out[4*3]   & t04;
+	t09          = t04       & t05;
+	t10          = in[4*2]   ^ t06;
+	t11          = in[4*1]   & in[4*2];
+	t12          = t04       ^ t08;
+	t13          = t11       | t03;
+	t14          = t10       ^ t09;
+	t15          = in[4*0]   & t05;
+	t16          = t11       | t12;
+	out[4*2]   = t13       ^ t08;
+	out[4*1]   = t15       ^ t16;
+	out[4*0]   =     ~ t14; 
+}
+
+/* InvS4:   5  0  8  3 10  9  7 14  2 12 11  6  4 15 13  1 */
+
+/* depth = 6,4,7,3, Total gates=17 */
+static void sb4_inv(uint8_t* out, const uint8_t* in){
+	uint8_t t02, t03, t04, t05, t06, t07, t09, t10, t11, t12, t13, t15, t01;
+	t01          = in[4*1]   | in[4*3];
+	t02          = in[4*2]   | in[4*3];
+	t03          = in[4*0]   & t01;
+	t04          = in[4*1]   ^ t02;
+	t05          = in[4*2]   ^ in[4*3];
+	t06          =     ~ t03;
+	t07          = in[4*0]   & t04;
+	out[4*1]   = t05       ^ t07;
+	t09          = out[4*1]   | t06;
+	t10          = in[4*0]   ^ t07;
+	t11          = t01       ^ t09;
+	t12          = in[4*3]   ^ t04;
+	t13          = in[4*2]   | t10;
+	out[4*3]   = t03       ^ t12;
+	t15          = in[4*0]   ^ t04;
+	out[4*2]   = t11       ^ t13;
+	out[4*0]   = t15       ^ t09; 
+}
+
+/* S5:  15  5  2 11  4 10  9 12  0  3 14  8 13  6  7  1 */
+
+/* depth = 4,6,8,6, Total gates=17 */
+static void sb5(uint8_t* out, const uint8_t* in){
+	uint8_t t02, t03, t04, t05, t07, t08, t09, t10, t11, t12, t13, t14, t01;
+	t01          = in[4*1]   ^ in[4*3];
+	t02          = in[4*1]   | in[4*3];
+	t03          = in[4*0]   & t01;
+	t04          = in[4*2]   ^ t02;
+	t05          = t03       ^ t04;
+	out[4*0]   =     ~ t05;
+	t07          = in[4*0]   ^ t01;
+	t08          = in[4*3]   | out[4*0];
+	t09          = in[4*1]   | t05;
+	t10          = in[4*3]   ^ t08;
+	t11          = in[4*1]   | t07;
+	t12          = t03       | out[4*0];
+	t13          = t07       | t10;
+	t14          = t01       ^ t11;
+	out[4*2]   = t09       ^ t13;
+	out[4*1]   = t07       ^ t08;
+	out[4*3]   = t12       ^ t14; 
+}
+
+/* InvS5:   8 15  2  9  4  1 13 14 11  6  5  3  7 12 10  0 */
+
+/* depth = 4,6,9,7, Total gates=17 */
+static void sb5_inv(uint8_t* out, const uint8_t* in){
+	uint8_t t02, t03, t04, t05, t07, t08, t09, t10, t12, t13, t15, t16, t01;
+	t01          = in[4*0]   & in[4*3];
+	t02          = in[4*2]   ^ t01;
+	t03          = in[4*0]   ^ in[4*3];
+	t04          = in[4*1]   & t02;
+	t05          = in[4*0]   & in[4*2];
+	out[4*0]   = t03       ^ t04;
+	t07          = in[4*0]   & out[4*0];
+	t08          = t01       ^ out[4*0];
+	t09          = in[4*1]   | t05;
+	t10          =     ~ in[4*1];
+	out[4*1]   = t08       ^ t09;
+	t12          = t10       | t07;
+	t13          = out[4*0]   | out[4*1];
+	out[4*3]   = t02       ^ t12;
+	t15          = t02       ^ t13;
+	t16          = in[4*1]   ^ in[4*3];
+	out[4*2]   = t16       ^ t15; 
+}
+
+/* S6:   7  2 12  5  8  4  6 11 14  9  1 15 13  3 10  0 */
+
+/* depth = 8,3,6,3, Total gates=19 */
+static void sb6(uint8_t* out, const uint8_t* in){
+	uint8_t t02, t03, t04, t05, t07, t08, t09, t10, t11, t12, t13, t15, t17, t18, t01;
+	t01          = in[4*0]   & in[4*3];
+	t02          = in[4*1]   ^ in[4*2];
+	t03          = in[4*0]   ^ in[4*3];
+	t04          = t01       ^ t02;
+	t05          = in[4*1]   | in[4*2];
+	out[4*1]   =     ~ t04;
+	t07          = t03       & t05;
+	t08          = in[4*1]   & out[4*1];
+	t09          = in[4*0]   | in[4*2];
+	t10          = t07       ^ t08;
+	t11          = in[4*1]   | in[4*3];
+	t12          = in[4*2]   ^ t11;
+	t13          = t09       ^ t10;
+	out[4*2]   =     ~ t13;
+	t15          = out[4*1]   & t03;
+	out[4*3]   = t12       ^ t07;
+	t17          = in[4*0]   ^ in[4*1];
+	t18          = out[4*2]   ^ t15;
+	out[4*0]   = t17       ^ t18; 
+}
+
+/* InvS6:  15 10  1 13  5  3  6  0  4  9 14  7  2 12  8 11 */
+
+/* depth = 5,3,8,6, Total gates=19 */
+static void sb6_inv(uint8_t* out, const uint8_t* in){
+	uint8_t t02, t03, t04, t05, t06, t07, t08, t09, t12, t13, t14, t15, t16, t17, t01;
+	t01          = in[4*0]   ^ in[4*2];
+	t02          =     ~ in[4*2];
+	t03          = in[4*1]   & t01;
+	t04          = in[4*1]   | t02;
+	t05          = in[4*3]   | t03;
+	t06          = in[4*1]   ^ in[4*3];
+	t07          = in[4*0]   & t04;
+	t08          = in[4*0]   | t02;
+	t09          = t07       ^ t05;
+	out[4*1]   = t06       ^ t08;
+	out[4*0]   =     ~ t09;
+	t12          = in[4*1]   & out[4*0];
+	t13          = t01       & t05;
+	t14          = t01       ^ t12;
+	t15          = t07       ^ t13;
+	t16          = in[4*3]   | t02;
+	t17          = in[4*0]   ^ out[4*1];
+	out[4*3]   = t17       ^ t15;
+	out[4*2]   = t16       ^ t14; 
+}
+
+/* S7:   1 13 15  0 14  8  2 11  7  4 12 10  9  3  5  6 */
+
+/* depth = 10,7,10,4, Total gates=19 */
+static void sb7(uint8_t* out, const uint8_t* in){
+	uint8_t t02, t03, t04, t05, t06, t08, t09, t10, t11, t13, t14, t15, t16, t17, t01;
+	t01          = in[4*0]   & in[4*2];
+	t02          =     ~ in[4*3];
+	t03          = in[4*0]   & t02;
+	t04          = in[4*1]   | t01;
+	t05          = in[4*0]   & in[4*1];
+	t06          = in[4*2]   ^ t04;
+	out[4*3]   = t03       ^ t06;
+	t08          = in[4*2]   | out[4*3];
+	t09          = in[4*3]   | t05;
+	t10          = in[4*0]   ^ t08;
+	t11          = t04       & out[4*3];
+	out[4*1]   = t09       ^ t10;
+	t13          = in[4*1]   ^ out[4*1];
+	t14          = t01       ^ out[4*1];
+	t15          = in[4*2]   ^ t05;
+	t16          = t11       | t13;
+	t17          = t02       | t14;
+	out[4*0]   = t15       ^ t17;
+	out[4*2]   = in[4*0]   ^ t16; 
+}
+
+/* InvS7:   3  0  6 13  9 14 15  8  5 12 11  7 10  1  4  2 */
+
+/* depth = 9,7,3,3, Total gates=18 */
+static void sb7_inv(uint8_t* out, const uint8_t* in){
+	uint8_t t02, t03, t04, t06, t07, t08, t09, t10, t11, t13, t14, t15, t16, t01;
+	t01          = in[4*0]   & in[4*1];
+	t02          = in[4*0]   | in[4*1];
+	t03          = in[4*2]   | t01;
+	t04          = in[4*3]   & t02;
+	out[4*3]   = t03       ^ t04;
+	t06          = in[4*1]   ^ t04;
+	t07          = in[4*3]   ^ out[4*3];
+	t08          =     ~ t07;
+	t09          = t06       | t08;
+	t10          = in[4*1]   ^ in[4*3];
+	t11          = in[4*0]   | in[4*3];
+	out[4*1]   = in[4*0]   ^ t09;
+	t13          = in[4*2]   ^ t06;
+	t14          = in[4*2]   & t11;
+	t15          = in[4*3]   | out[4*1];
+	t16          = t01       | t10;
+	out[4*0]   = t13       ^ t15;
+	out[4*2]   = t14       ^ t16; 
+}
+
+typedef void(*sb_fpt)(uint8_t*, const uint8_t*);
+
+sb_fpt sf_tab[] PROGMEM = {
+	sb0, sb1, sb2, sb3, 
+	sb4, sb5, sb6, sb7
+};
+
+sb_fpt sinvf_tab[] PROGMEM = {
+	sb0_inv, sb1_inv, sb2_inv, sb3_inv, 
+	sb4_inv, sb5_inv, sb6_inv, sb7_inv
+};
+
+void sbox128(void * w, uint8_t box){
+	uint8_t i, buffer[16];
+	box &= 0x7;
+	
+	sb_fpt fp;
+	fp = (sb_fpt)pgm_read_word(&(sf_tab[box]));
+	for(i=0; i<4; ++i){
+		fp(buffer+i, (uint8_t*)w+i);
+	}
+	memcpy(w, buffer, 16);
+}
+
+void inv_sbox128(void * w, uint8_t box){
+	uint8_t i, buffer[16];
+	box &= 0x7;
+	
+	sb_fpt fp;
+	fp = (sb_fpt)pgm_read_word(&(sinvf_tab[box]));
+	for(i=0; i<4; ++i){
+		fp(buffer+i, (uint8_t*)w+i);
+	}
+	memcpy(w, buffer, 16);
+}
+
+
+
+
+
+
+
+
diff --git a/serpent/serpent-sboxes-fast.S b/serpent/serpent-sboxes-fast.S
new file mode 100644
index 0000000..9242272
--- /dev/null
+++ b/serpent/serpent-sboxes-fast.S
@@ -0,0 +1,233 @@
+/* serpent-sboxes-fast.S */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/* 
+ * File:        serpent-sboxes-fast.S
+ * Author:      Daniel Otte
+ * Date:        2008-08-07
+ * License:     GPLv3 or later
+ * Description: Implementation of the serpent sbox function.
+ * 
+ */
+ 
+#include <avr/io.h>
+#include "avr-asm-macros.S"
+
+
+serpent_sbox_fast:
+ .byte 0x33, 0x88, 0xFF, 0x11, 0xAA, 0x66, 0x55, 0xBB 
+ .byte 0xEE, 0xDD, 0x44, 0x22, 0x77, 0x00, 0x99, 0xCC
+ .byte 0xFF, 0xCC, 0x22, 0x77, 0x99, 0x00, 0x55, 0xAA
+ .byte 0x11, 0xBB, 0xEE, 0x88, 0x66, 0xDD, 0x33, 0x44 
+ .byte 0x88, 0x66, 0x77, 0x99, 0x33, 0xCC, 0xAA, 0xFF 
+ .byte 0xDD, 0x11, 0xEE, 0x44, 0x00, 0xBB, 0x55, 0x22
+ .byte 0x00, 0xFF, 0xBB, 0x88, 0xCC, 0x99, 0x66, 0x33 
+ .byte 0xDD, 0x11, 0x22, 0x44, 0xAA, 0x77, 0x55, 0xEE
+ .byte 0x11, 0xFF, 0x88, 0x33, 0xCC, 0x00, 0xBB, 0x66 
+ .byte 0x22, 0x55, 0x44, 0xAA, 0x99, 0xEE, 0x77, 0xDD
+ .byte 0xFF, 0x55, 0x22, 0xBB, 0x44, 0xAA, 0x99, 0xCC 
+ .byte 0x00, 0x33, 0xEE, 0x88, 0xDD, 0x66, 0x77, 0x11
+ .byte 0x77, 0x22, 0xCC, 0x55, 0x88, 0x44, 0x66, 0xBB 
+ .byte 0xEE, 0x99, 0x11, 0xFF, 0xDD, 0x33, 0xAA, 0x00
+ .byte 0x11, 0xDD, 0xFF, 0x00, 0xEE, 0x88, 0x22, 0xBB 
+ .byte 0x77, 0x44, 0xCC, 0xAA, 0x99, 0x33, 0x55, 0x66
+
+serpent_sbox_inv_fast:
+ .byte 0xDD, 0x33, 0xBB, 0x00, 0xAA, 0x66, 0x55, 0xCC 
+ .byte 0x11, 0xEE, 0x44, 0x77, 0xFF, 0x99, 0x88, 0x22
+ .byte 0x55, 0x88, 0x22, 0xEE, 0xFF, 0x66, 0xCC, 0x33 
+ .byte 0xBB, 0x44, 0x77, 0x99, 0x11, 0xDD, 0xAA, 0x00
+ .byte 0xCC, 0x99, 0xFF, 0x44, 0xBB, 0xEE, 0x11, 0x22 
+ .byte 0x00, 0x33, 0x66, 0xDD, 0x55, 0x88, 0xAA, 0x77
+ .byte 0x00, 0x99, 0xAA, 0x77, 0xBB, 0xEE, 0x66, 0xDD 
+ .byte 0x33, 0x55, 0xCC, 0x22, 0x44, 0x88, 0xFF, 0x11
+ .byte 0x55, 0x00, 0x88, 0x33, 0xAA, 0x99, 0x77, 0xEE 
+ .byte 0x22, 0xCC, 0xBB, 0x66, 0x44, 0xFF, 0xDD, 0x11
+ .byte 0x88, 0xFF, 0x22, 0x99, 0x44, 0x11, 0xDD, 0xEE 
+ .byte 0xBB, 0x66, 0x55, 0x33, 0x77, 0xCC, 0xAA, 0x00
+ .byte 0xFF, 0xAA, 0x11, 0xDD, 0x55, 0x33, 0x66, 0x00 
+ .byte 0x44, 0x99, 0xEE, 0x77, 0x22, 0xCC, 0x88, 0xBB
+ .byte 0x33, 0x00, 0x66, 0xDD, 0x99, 0xEE, 0xFF, 0x88 
+ .byte 0x55, 0xCC, 0xBB, 0x77, 0xAA, 0x11, 0x44, 0x22
+                  
+ 
+/*
+ * void ip(uint32_t *i, uint8_t *o){
+ */
+/* 
+ * param i is given in r24:r25
+ * parma o is given in r22:r23
+ */
+.global serpent_ip
+serpent_ip:
+	push_range 2, 17	
+	movw r26, r24
+	ldi r24, 16
+	clr r31
+	ldi r30, 2
+1:
+	ld r25, X+
+	st Z+, r25
+	dec r24
+	brne 1b
+	/* now the whole input is loaded in r2-r18 */
+	movw r26, r22
+	ldi r21, 4
+4:
+	ldi r20, 8	
+2:	
+	lsr r2
+	ror r19
+	lsr r6
+	ror 19
+	lsr r10
+	ror r19
+	lsr r14
+	ror 19
+	sbrc r20, 0
+	st X+, r19
+	dec r20
+	brne 2b
+	
+	ldi r20, 15
+	ldi r30, 2
+3:
+	ldd r19, Z+1
+	st Z+, r19
+	dec r20
+	brne 3b
+	
+	dec r21
+	brne 4b
+	pop_range 2, 17 
+ 	ret
+
+/*
+ * void serpent_fp(uint32_t *i, uint8_t *o){
+ */
+/* 
+ * param i is given in r24:r25
+ * parma o is given in r22:r23
+ */
+.global serpent_fp
+serpent_fp:
+	movw r26, r24
+	movw r30, r22
+	ldi r18, 4
+1:	
+	ldi r19, 8 
+2:
+	sbrs r19, 0
+	ld r24, X+
+3:
+	lsr r24
+	ror r20
+	lsr r24
+	ror r21
+	lsr r24
+	ror r22
+	lsr r24
+	ror r23
+	dec r19
+	brne 2b
+	
+	st Z+, r20
+	std Z+3, r21
+	std Z+7, r22
+	std Z+11, r23
+	
+	dec r18
+	brne 1b 	
+	ret
+	
+
+/*
+ * void inv_sbox128(void * w, uint8_t box)
+ */
+.global inv_sbox128 
+inv_sbox128:
+ 	andi r22, 0x07
+ 	ori  r22, 0x08
+ 	rjmp sbox128x_fast
+ 
+/*	
+ * void sbox128(void * w, uint8_t box);
+ */
+/*
+ * param w   is passed in r24:r25
+ * param box is passed in r22
+ */
+.global sbox128 
+sbox128:
+ 	andi r22, 0x07
+ 
+sbox128x_fast:
+	stack_alloc 16
+	adiw r30, 1
+	push_ r24, r25, r22, r30, r31
+	movw r22, r30	/* Z points to the stack buffer */
+	rcall serpent_ip
+	pop_ r27, r26, r22
+	ldi r25, hi8(serpent_sbox_fast)
+	ldi r24, lo8(serpent_sbox_fast)
+	swap r22 /* r22 *= 16 */
+	add r24, r22
+	adc r25, r1
+	/* now we have X pointing to the buffer and (r24:r25) pointing to the SBox */
+	ldi r22, 16
+1:	
+	movw r30, r24
+	ld r18, X
+	mov r20, r18
+	andi r18, 0x0f
+	add r30, r18
+	adc r31, r1
+	lpm r19, Z
+2:	
+	swap r20
+	andi r20, 0x0f
+	movw r30, r24
+	add r30, r20
+	adc r31, r1
+	lpm r21, Z
+3:	
+	andi r19, 0x0F
+	andi r21, 0xF0
+	or r19, r21
+	st X+, r19
+	dec r22
+	brne 1b
+	
+	pop_ r23, r22
+	movw r24, r26
+	sbiw r24, 16
+
+	rcall serpent_fp
+	
+	stack_free 16	
+	ret
+
+
+
+
+
+
+ 
+
+
diff --git a/serpent/serpent-sboxes-small.S b/serpent/serpent-sboxes-small.S
new file mode 100644
index 0000000..028fd77
--- /dev/null
+++ b/serpent/serpent-sboxes-small.S
@@ -0,0 +1,231 @@
+/* serpent_sboxes.S */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/* 
+ * File:        serpent_sboxes.S
+ * Author:      Daniel Otte
+ * Date:        2008-08-07
+ * License:     GPLv3 or later
+ * Description: Implementation of the serpent sbox function.
+ * 
+ */
+ 
+#include <avr/io.h>
+#include "avr-asm-macros.S"
+
+
+serpent_sbox:
+ .byte 0x83, 0x1F, 0x6A, 0xB5, 0xDE, 0x24, 0x07, 0xC9
+ .byte 0xCF, 0x72, 0x09, 0xA5, 0xB1, 0x8E, 0xD6, 0x43 
+ .byte 0x68, 0x97, 0xC3, 0xFA, 0x1D, 0x4E, 0xB0, 0x25
+ .byte 0xF0, 0x8B, 0x9C, 0x36, 0x1D, 0x42, 0x7A, 0xE5
+ .byte 0xF1, 0x38, 0x0C, 0x6B, 0x52, 0xA4, 0xE9, 0xD7
+ .byte 0x5F, 0xB2, 0xA4, 0xC9, 0x30, 0x8E, 0x6D, 0x17
+ .byte 0x27, 0x5C, 0x48, 0xB6, 0x9E, 0xF1, 0x3D, 0x0A
+ .byte 0xD1, 0x0F, 0x8E, 0xB2, 0x47, 0xAC, 0x39, 0x65
+
+serpent_sbox_inv:
+ .byte 0x3D, 0x0B, 0x6A, 0xC5, 0xE1, 0x74, 0x9F, 0x28
+ .byte 0x85, 0xE2, 0x6F, 0x3C, 0x4B, 0x97, 0xD1, 0x0A
+ .byte 0x9C, 0x4F, 0xEB, 0x21, 0x30, 0xD6, 0x85, 0x7A
+ .byte 0x90, 0x7A, 0xEB, 0xD6, 0x53, 0x2C, 0x84, 0x1F
+ .byte 0x05, 0x38, 0x9A, 0xE7, 0xC2, 0x6B, 0xF4, 0x1D
+ .byte 0xF8, 0x92, 0x14, 0xED, 0x6B, 0x35, 0xC7, 0x0A
+ .byte 0xAF, 0xD1, 0x35, 0x06, 0x94, 0x7E, 0xC2, 0xB8
+ .byte 0x03, 0xD6, 0xE9, 0x8F, 0xC5, 0x7B, 0x1A, 0x24                  
+ 
+/*
+ * void ip(uint32_t *i, uint8_t *o){
+ */
+/* 
+ * param i is given in r24:r25
+ * parma o is given in r22:r23
+ */
+.global serpent_ip
+serpent_ip:
+	push_range 2, 17	
+	movw r26, r24
+	ldi r24, 16
+	clr r31
+	ldi r30, 2
+1:
+	ld r25, X+
+	st Z+, r25
+	dec r24
+	brne 1b
+	/* now the whole input is loaded in r2-r18 */
+	movw r26, r22
+	ldi r21, 4
+4:
+	ldi r20, 8	
+2:	
+	lsr r2
+	ror r19
+	lsr r6
+	ror 19
+	lsr r10
+	ror r19
+	lsr r14
+	ror 19
+	sbrc r20, 0
+	st X+, r19
+	dec r20
+	brne 2b
+	
+	ldi r20, 15
+	ldi r30, 2
+3:
+	ldd r19, Z+1
+	st Z+, r19
+	dec r20
+	brne 3b
+	
+	dec r21
+	brne 4b
+	pop_range 2, 17 
+ 	ret
+
+/*
+ * void serpent_fp(uint32_t *i, uint8_t *o){
+ */
+/* 
+ * param i is given in r24:r25
+ * parma o is given in r22:r23
+ */
+.global serpent_fp
+serpent_fp:
+	movw r26, r24
+	movw r30, r22
+	ldi r18, 4
+1:	
+	ldi r19, 8 
+2:
+	sbrs r19, 0
+	ld r24, X+
+3:
+	lsr r24
+	ror r20
+	lsr r24
+	ror r21
+	lsr r24
+	ror r22
+	lsr r24
+	ror r23
+	dec r19
+	brne 2b
+	
+	st Z+, r20
+	std Z+3, r21
+	std Z+7, r22
+	std Z+11, r23
+	
+	dec r18
+	brne 1b 
+	
+	ret
+/*
+ * void inv_sbox128(void * w, uint8_t box)
+ */
+.global inv_sbox128 
+inv_sbox128:
+ 	andi r22, 0x07
+ 	ori  r22, 0x08
+ 	rjmp sbox128x
+ 
+/*	
+ * void sbox128(void * w, uint8_t box);
+ */
+/*
+ * param w   is passed in r24:r25
+ * param box is passed in r22
+ */
+.global sbox128 
+sbox128:
+ 	andi r22, 0x07
+
+/*	
+ * void sbox128x(void * w, uint8_t box);
+ */
+/*
+ * param w   is passed in r24:r25
+ * param box is passed in r22
+ */
+ 
+.global sbox128x
+sbox128x:
+	stack_alloc 16
+	adiw r30, 1
+	push_ r24, r25, r22, r30, r31
+	movw r22, r30	/* Z points to the stack buffer */
+	rcall serpent_ip
+	pop_ r27, r26, r22
+	ldi r25, hi8(serpent_sbox)
+	ldi r24, lo8(serpent_sbox)
+	swap r22
+	lsr  r22 /* r22 *= 8 */
+	add r24, r22
+	adc r25, r1
+	/* now we have X pointing to the buffer and (r24:r25) pointing to the SBox */
+	ldi r22, 16
+1:	
+	movw r30, r24
+	ld r18, X
+	mov r20, r18
+	andi r18, 0x0f
+	bst r18, 0
+	lsr r18
+	add r30, r18
+	adc r31, r1
+	lpm r19, Z
+	brtc 2f
+	swap r19
+2:	
+	swap r20
+	andi r20, 0x0f
+	bst r20, 0
+	lsr r20
+	movw r30, r24
+	add r30, r20
+	adc r31, r1
+	lpm r21, Z
+	brts 3f
+	swap r21
+3:	
+	andi r19, 0x0F
+	andi r21, 0xF0
+	or r19, r21
+	st X+, r19
+	dec r22
+	brne 1b
+	
+	pop_ r23, r22
+	movw r24, r26
+	sbiw r24, 16
+
+	rcall serpent_fp
+	
+	stack_free 16	
+	ret
+ 
+
+
+
+ 
+
+
diff --git a/serpent/serpent-sboxes.h b/serpent/serpent-sboxes.h
new file mode 100644
index 0000000..c391edc
--- /dev/null
+++ b/serpent/serpent-sboxes.h
@@ -0,0 +1,28 @@
+/* serpent-sboxes.h */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+#ifndef SERPENTSBOXES_H_
+#define SERPENTSBOXES_H_
+
+#include <stdint.h>
+
+void sbox128(void * w, uint8_t box);
+void inv_sbox128(void * w, uint8_t box);
+
+
+#endif /*SERPENTSBOXES_H_*/
diff --git a/serpent/serpent-sboxes_c.c b/serpent/serpent-sboxes_c.c
new file mode 100644
index 0000000..3a59969
--- /dev/null
+++ b/serpent/serpent-sboxes_c.c
@@ -0,0 +1,128 @@
+/* serpent-sboxes.c */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/* serpent-sboxes.c
+ * a non-bitsliced implementation of the serpent sboxes
+ * author: Daniel Otte 
+ * email:  daniel.otte@rub.de
+ * license: GPLv3
+ */
+
+#include <stdint.h>
+#include <string.h> /* memset() */
+#include <avr/pgmspace.h>
+#include "serpent-sboxes.h"
+
+uint8_t sbox[] PROGMEM = {
+ 0x38, 0xF1, 0xA6, 0x5B, 0xED, 0x42, 0x70, 0x9C,
+ 0xFC, 0x27, 0x90, 0x5A, 0x1B, 0xE8, 0x6D, 0x34, 
+ 0x86, 0x79, 0x3C, 0xAF, 0xD1, 0xE4, 0x0B, 0x52,
+ 0x0F, 0xB8, 0xC9, 0x63, 0xD1, 0x24, 0xA7, 0x5E,
+ 0x1F, 0x83, 0xC0, 0xB6, 0x25, 0x4A, 0x9E, 0x7D,
+ 0xF5, 0x2B, 0x4A, 0x9C, 0x03, 0xE8, 0xD6, 0x71,
+ 0x72, 0xC5, 0x84, 0x6B, 0xE9, 0x1F, 0xD3, 0xA0,
+ 0x1D, 0xF0, 0xE8, 0x2B, 0x74, 0xCA, 0x93, 0x56,
+/* now the inverted sboxes */
+ 0xD3, 0xB0, 0xA6, 0x5C, 0x1E, 0x47, 0xF9, 0x82,
+ 0x58, 0x2E, 0xF6, 0xC3, 0xB4, 0x79, 0x1D, 0xA0,
+ 0xC9, 0xF4, 0xBE, 0x12, 0x03, 0x6D, 0x58, 0xA7,
+ 0x09, 0xA7, 0xBE, 0x6D, 0x35, 0xC2, 0x48, 0xF1,
+ 0x50, 0x83, 0xA9, 0x7E, 0x2C, 0xB6, 0x4F, 0xD1,
+ 0x8F, 0x29, 0x41, 0xDE, 0xB6, 0x53, 0x7C, 0xA0,
+ 0xFA, 0x1D, 0x53, 0x60, 0x49, 0xE7, 0x2C, 0x8B,
+ 0x30, 0x6D, 0x9E, 0xF8, 0x5C, 0xB7, 0xA1, 0x42
+};        
+         
+
+#define SHR_O(a) c=(a)&1; ((a) = (a)>>1)
+#define SHR_I(a) ((a) = (c?0x80:0x00)| ((a)>>1))
+
+static void serpent_ip(uint32_t *i, uint8_t *o){
+	uint8_t c; // carry 
+	uint8_t n,m;
+	memset(o, 0, 16);
+	for(n=0; n<16; ++n){
+		for(m=0; m<2; ++m){
+		SHR_O(i[0]);
+		SHR_I(o[n]);
+		SHR_O(i[1]);
+		SHR_I(o[n]);
+		SHR_O(i[2]);
+		SHR_I(o[n]);
+		SHR_O(i[3]);
+		SHR_I(o[n]);
+		}
+	}
+}
+
+#undef SHR_I
+#define SHR_I(a) ((a) = (c?0x80000000L:0x00L)| ((a)>>1)) /* we use 32-bit words here */
+
+static void serpent_fp(uint32_t *i, uint32_t *o){
+	uint8_t c; // carry 
+	uint8_t n,m;
+	memset(o, 0, 16);
+	for(n=0; n<4; ++n){
+		for(m=0; m<8; ++m){
+		SHR_O(i[n]);
+		SHR_I(o[0]);
+		SHR_O(i[n]);
+		SHR_I(o[1]);
+		SHR_O(i[n]);
+		SHR_I(o[2]);
+		SHR_O(i[n]);
+		SHR_I(o[3]);
+		}
+	}
+}
+
+/******************************************************************************/
+static void sbox128x(uint8_t box, void* w){
+	uint8_t sb[16];
+	uint8_t i,t,x;
+	box &= 0x0f;
+	/* load sbox */
+	for(i=0; i<8; ++i){
+		t = pgm_read_byte(sbox + box*8 + i);
+		sb[2*i+0]=t>>4;
+		sb[2*i+1]=t&0xf;
+	}
+	uint8_t o[16];
+	serpent_ip(w,o);
+	
+	for(i=0; i<16; ++i){
+		t = ((uint8_t*)o)[i];
+		x = sb[t>>4];
+		x <<= 4;
+		x |= sb[t&0xf];
+		((uint8_t*)o)[i] = x;
+	}
+	serpent_fp((uint32_t*)o, w);
+}
+
+void sbox128(void * w, uint8_t box){
+	sbox128x(box&0x7, w);
+}
+
+
+void inv_sbox128(void * w, uint8_t box){
+	sbox128x(((box&0x7)|0x8), w);
+}
+
+
+
diff --git a/serpent/serpent.c b/serpent/serpent.c
new file mode 100644
index 0000000..40ccecd
--- /dev/null
+++ b/serpent/serpent.c
@@ -0,0 +1,140 @@
+/* serpent.c */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/* serpent.c
+ * a bitsliced implementation of the serpent cipher for avr microcontrollers
+ * author: Daniel Otte 
+ * email:  daniel.otte@rub.de
+ * license: GPLv3
+ */
+
+#include <stdint.h>
+#include <string.h> /* memset() */
+#include <avr/pgmspace.h>
+#include "memxor.h"
+#include "serpent.h"
+#include "serpent-sboxes.h"
+
+/******************************************************************************/
+
+uint32_t rotl32(uint32_t a, uint8_t n){
+	return ((a<<n) | (a>>(32-n)));
+}
+
+
+uint32_t rotr32(uint32_t a, uint8_t n){
+	return ((a>>n) | (a<<(32-n)));
+}
+
+
+#define X0 (((uint32_t*)b)[0])
+#define X1 (((uint32_t*)b)[1])
+#define X2 (((uint32_t*)b)[2])
+#define X3 (((uint32_t*)b)[3])
+
+static void serpent_lt(uint8_t *b){
+	X0 = rotl32(X0, 13);
+	X2 = rotl32(X2,  3);
+	X1 ^= X0 ^ X2;
+	X3 ^= X2 ^ (X0 << 3);
+	X1 = rotl32(X1, 1);
+	X3 = rotl32(X3, 7);
+	X0 ^= X1 ^ X3;
+	X2 ^= X3 ^ (X1 << 7);
+	X0 = rotl32(X0, 5);
+	X2 = rotr32(X2, 10);
+}
+
+static void serpent_inv_lt(uint8_t *b){
+	X2 = rotl32(X2, 10);
+	X0 = rotr32(X0, 5);
+	X2 ^= X3 ^ (X1 << 7);
+	X0 ^= X1 ^ X3;
+	X3 = rotr32(X3, 7);
+	X1 = rotr32(X1, 1);
+	X3 ^= X2 ^ (X0 << 3);
+	X1 ^= X0 ^ X2;
+	X2 = rotr32(X2,  3);
+	X0 = rotr32(X0, 13);
+}
+
+#define GOLDEN_RATIO 0x9e3779b9l
+
+static uint32_t serpent_gen_w(uint32_t * b, uint8_t i){
+	uint32_t ret;
+	ret = b[0] ^ b[3] ^ b[5] ^ b[7] ^ GOLDEN_RATIO ^ (uint32_t)i;
+	ret = rotl32(ret, 11);
+	return ret;
+} 
+
+void serpent_init(const void* key, uint16_t keysize_b, serpent_ctx_t* ctx){
+	uint32_t buffer[8];
+	uint8_t i,j;
+	if(keysize_b<256){
+		/* keysize is less than 256 bit, padding needed */
+		memset(buffer, 0, 32);
+		memcpy(buffer, key, (keysize_b+7)/8);
+		((uint8_t*)buffer)[keysize_b/8] |= 1<<(keysize_b%8);
+	} else {
+		/* keysize is 256 bit */
+		memcpy(buffer, key, 32); 
+	}
+	for(i=0; i<33; ++i){
+		for(j=0; j<4; ++j){
+			ctx->k[i][j] = serpent_gen_w(buffer, i*4+j);
+			memmove(buffer, &(buffer[1]), 7*4); /* shift buffer one to the "left" */
+			buffer[7] = ctx->k[i][j];
+		}
+	}
+	for(i=0; i<33; ++i){
+		sbox128(ctx->k[i],3-i);
+	}
+}
+
+void serpent_enc(void* buffer, const serpent_ctx_t* ctx){
+	uint8_t i;
+	for(i=0; i<31; ++i){
+		memxor(buffer, ctx->k[i], 16);
+		sbox128(buffer, i);
+		serpent_lt((uint8_t*)buffer);
+	}
+	memxor(buffer, ctx->k[i], 16);
+	sbox128(buffer, i);
+	++i;
+	memxor(buffer, ctx->k[i], 16);
+}
+
+void serpent_dec(void* buffer, const serpent_ctx_t* ctx){
+	int8_t i=32;
+	
+	memxor(buffer, ctx->k[i], 16);
+	--i;
+	inv_sbox128(buffer, i);
+	memxor((uint8_t*)buffer, ctx->k[i], 16);
+	--i;
+	for(; i>=0; --i){
+		serpent_inv_lt(buffer);
+		inv_sbox128(buffer, i);
+		memxor(buffer, ctx->k[i], 16);
+	}
+}
+
+
+
+
+
diff --git a/serpent/serpent.h b/serpent/serpent.h
new file mode 100644
index 0000000..84e4a87
--- /dev/null
+++ b/serpent/serpent.h
@@ -0,0 +1,47 @@
+/* serpent.h */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/** \file   serpent.h
+ * \author  Daniel Otte
+ * \license GPLv3
+ * \brief a implementation of the serpent cipher for avr microcontrollers
+ */
+
+#ifndef SERPENT_H_
+#define SERPENT_H_
+
+#include <stdint.h>
+
+typedef uint32_t serpent_subkey_t[4];
+
+typedef struct serpent_ctx_st {
+	serpent_subkey_t k[33];
+}  serpent_ctx_t;
+
+#define SERPENT_KEY128 128
+#define SERPENT_KEY192 192
+#define SERPENT_KEY256 256
+
+
+/* key must be 256bit (32 byte) large! */
+void serpent_init(const void* key, uint16_t keysize_b, serpent_ctx_t* ctx);
+void serpent_enc(void* buffer, const serpent_ctx_t* ctx);
+void serpent_dec(void* buffer, const serpent_ctx_t* ctx);
+
+
+#endif /*SERPENT_H_*/
diff --git a/sha1-asm.S b/sha1-asm.S
deleted file mode 100644
index f571685..0000000
--- a/sha1-asm.S
+++ /dev/null
@@ -1,886 +0,0 @@
-/* sha1-asm.S */
-/*
-    This file is part of the AVR-Crypto-Lib.
-    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-/*
- * Author:	Daniel Otte
- *
- * License: GPLv3 or later
-*/
-; SHA1 implementation in assembler for AVR
-SHA1_BLOCK_BITS = 512
-SHA1_HASH_BITS = 160
-
-.macro precall
-	/* push r18 - r27, r30 - r31*/
-	push r0
-	push r1
-	push r18
-	push r19
-	push r20
-	push r21
-	push r22
-	push r23
-	push r24
-	push r25
-	push r26
-	push r27
-	push r30
-	push r31
-	clr r1
-.endm
-
-.macro postcall
-	pop r31
-	pop r30
-	pop r27
-	pop r26
-	pop r25
-	pop r24
-	pop r23
-	pop r22
-	pop r21
-	pop r20
-	pop r19
-	pop r18
-	pop r1
-	pop r0
-.endm
-
-
-.macro hexdump length
-	push r27
-	push r26
-	ldi r25, '\r'
-	mov r24, r25
-	call uart_putc
-	ldi r25, '\n'
-	mov r24, r25
-	call uart_putc
-	pop r26
-	pop r27
-	movw r24, r26
-.if \length > 16
-	ldi r22, lo8(16)
-	ldi r23, hi8(16)
-	push r27
-	push r26
-	call uart_hexdump
-	pop r26
-	pop r27
-	adiw r26, 16
-	hexdump \length-16
-.else
-	ldi r22, lo8(\length)
-	ldi r23, hi8(\length)
-	call uart_hexdump
-.endif
-.endm
-
-.macro delay
-/*	
-	push r0
-	push r1
-	clr r0
-1:	clr r1
-2:	dec r1
-	brne 2b
-	dec r0
-	brne 1b
-	pop r1
-	pop r0  // */
-.endm
-
-/* X points to Block */
-.macro dbg_hexdump length
-/*	
-	precall
-	hexdump \length
-	postcall
-	// */
-.endm
-
-
-
-.section .text
-
-SPL = 0x3D
-SPH = 0x3E
-SREG = 0x3F
-
-
-;
-;sha1_ctx_t is:
-;
-; [h0][h1][h2][h3][h4][length]
-; hn is 32 bit large, length is 64 bit large
-
-;###########################################################	
-
-.global sha1_ctx2hash
-; === sha1_ctx2hash ===
-; this function converts a state into a normal hash (bytestring)
-;  param1: the 16-bit destination pointer
-;	given in r25,r24 (r25 is most significant)
-;  param2: the 16-bit pointer to sha1_ctx structure
-;	given in r23,r22
-sha1_ctx2hash:
-	movw r26, r22
-	movw r30, r24
-	ldi r21, 5
-	sbiw r26, 4
-1:	
-	ldi r20, 4
-	adiw r26, 8
-2:	
-		ld r0, -X
-		st Z+, r0	
-	dec r20
-	brne 2b
-	
-	dec r21
-	brne 1b
-	
-	ret
-
-;###########################################################	
-
-.global sha1
-; === sha1 ===
-; this function calculates SHA-1 hashes from messages in RAM
-;  param1: the 16-bit hash destination pointer
-;	given in r25,r24 (r25 is most significant)
-;  param2: the 16-bit pointer to message
-;	given in r23,r22
-;  param3: 32-bit length value (length of message in bits)
-;   given in r21,r20,r19,r18
-sha1:
-sha1_prolog:
-	push r8
-	push r9
-	push r10
-	push r11
-	push r12
-	push r13
-	push r16
-	push r17
-	in r16, SPL
-	in r17, SPH
-	subi r16, 5*4+8 
-	sbci r17, 0	
-	in r0, SREG
-	cli
-	out SPL, r16
-	out SPH, r17
-	out SREG, r0
-	
-	push r25
-	push r24
-	inc r16
-	adc r17, r1
-	
-	movw r8, r18		/* backup of length*/
-	movw r10, r20
-	
-	movw r12, r22	/* backup pf msg-ptr */
-	
-	movw r24, r16
-	rcall sha1_init
-	/* if length >= 512 */
-1:
-	tst r11
-	brne 4f
-	tst r10
-	brne 4f
-	mov r19, r9
-	cpi r19, 0x02
-	brlo 4f
-	
-	movw r24, r16
-	movw r22, r12
-	rcall sha1_nextBlock
-	ldi r19, 0x64
-	add r22, r19
-	adc r23, r1
-	/* length -= 512 */
-	ldi r19, 0x02
-	sub r9, r19
-	sbc r10, r1
-	sbc r11, r1
-	rjmp 1b
-	
-4:
-	movw r24, r16
-	movw r22, r12
-	movw r20, r8
-	rcall sha1_lastBlock
-	
-	pop r24
-	pop r25
-	movw r22, r16
-	rcall sha1_ctx2hash	
-	
-sha1_epilog:
-	in r30, SPL
-	in r31, SPH
-	adiw r30, 5*4+8 	
-	in r0, SREG
-	cli
-	out SPL, r30
-	out SPH, r31
-	out SREG, r0
-	pop r17
-	pop r16
-	pop r13
-	pop r12
-	pop r11
-	pop r10
-	pop r9
-	pop r8
-	ret
-
-;###########################################################	
-
-
-; block MUST NOT be larger than 64 bytes
-
-.global sha1_lastBlock
-; === sha1_lastBlock ===
-; this function does padding & Co. for calculating SHA-1 hashes
-;  param1: the 16-bit pointer to sha1_ctx structure
-;	given in r25,r24 (r25 is most significant)
-;  param2: an 16-bit pointer to 64 byte block to hash
-;	given in r23,r22
-;  param3: an 16-bit integer specifing length of block in bits
-;	given in r21,r20
-sha1_lastBlock_localSpace = (SHA1_BLOCK_BITS/8+1)
-
-
-sha1_lastBlock:
-	cpi r21, 0x02
-	brlo sha1_lastBlock_prolog
-	push r25
-	push r24
-	push r23
-	push r22
-	push r21
-	push r20
-	rcall sha1_nextBlock
-	pop r20
-	pop r21
-	pop r22
-	pop r23
-	pop r24
-	pop r25
-	subi r21, 2
-	subi r23, -2
-	rjmp sha1_lastBlock
-sha1_lastBlock_prolog:
-	/* allocate space on stack */
-	in r30, SPL
-	in r31, SPH
-	in r1, SREG
-	subi r30, lo8(64)
-	sbci r31, hi8(64) /* ??? */
-	cli
-	out SPL, r30
-	out SPH, r31
-	out SREG,r1
-
-	adiw r30, 1 /* SP points to next free byte on stack */
-	mov r18, r20 /* r20 = LSB(length) */
-	lsr r18
-	lsr r18
-	lsr r18
-	bst r21, 0	/* may be we should explain this ... */
-	bld r18, 5  /* now: r18 == length/8 (aka. length in bytes) */
-	
-	
-	movw r26, r22 /* X points to begin of msg */
-	tst r18
-	breq sha1_lastBlock_post_copy
-	mov r1, r18
-sha1_lastBlock_copy_loop:
-	ld r0, X+
-	st Z+, r0
-	dec r1
-	brne sha1_lastBlock_copy_loop
-sha1_lastBlock_post_copy:	
-sha1_lastBlock_insert_stuffing_bit:	
-	ldi r19, 0x80
-	mov r0,r19 	
-	ldi r19, 0x07
-	and r19, r20 /* if we are in bitmode */
-	breq 2f	/* no bitmode */
-1:	
-	lsr r0
-	dec r19
-	brne 1b
-	ld r19, X
-/* maybe we should do some ANDing here, just for safety */
-	or r0, r19
-2:	
-	st Z+, r0
-	inc r18
-
-/* checking stuff here */
-	cpi r18, 64-8+1
-	brsh 0f 
-	rjmp sha1_lastBlock_insert_zeros
-0:
-	/* oh shit, we landed here */
-	/* first we have to fill it up with zeros */
-	ldi r19, 64
-	sub r19, r18
-	breq 2f
-1:	
-	st Z+, r1
-	dec r19
-	brne 1b	
-2:	
-	sbiw r30, 63
-	sbiw r30,  1
-	movw r22, r30
-	
-	push r31
-	push r30
-	push r25
-	push r24
-	push r21
-	push r20
-	rcall sha1_nextBlock
-	pop r20
-	pop r21
-	pop r24
-	pop r25
-	pop r30
-	pop r31
-	
-	/* now we should subtract 512 from length */
-	movw r26, r24
-	adiw r26, 4*5+1 /* we can skip the lowest byte */
-	ld r19, X
-	subi r19, hi8(512)
-	st X+, r19
-	ldi r18, 6
-1:
-	ld r19, X
-	sbci r19, 0
-	st X+, r19
-	dec r18
-	brne 1b
-	
-;	clr r18 /* not neccessary ;-) */
-	/* reset Z pointer to begin of block */
-
-sha1_lastBlock_insert_zeros:	
-	ldi r19, 64-8
-	sub r19, r18
-	breq sha1_lastBlock_insert_length
-	clr r1
-1:
-	st Z+, r1	/* r1 is still zero */
-	dec r19
-	brne 1b
-
-;	rjmp sha1_lastBlock_epilog
-sha1_lastBlock_insert_length:
-	movw r26, r24	/* X points to state */
-	adiw r26, 5*4	/* X points to (state.length) */
-	adiw r30, 8		/* Z points one after the last byte of block */
-	ld r0, X+
-	add r0, r20
-	st -Z, r0
-	ld r0, X+
-	adc r0, r21
-	st -Z, r0
-	ldi r19, 6
-1:
-	ld r0, X+
-	adc r0, r1
-	st -Z, r0
-	dec r19
-	brne 1b
-
-	sbiw r30, 64-8
-	movw r22, r30
-	rcall sha1_nextBlock
-
-sha1_lastBlock_epilog:
-	in r30, SPL
-	in r31, SPH
-	in r1, SREG
-	adiw r30, 63 ; lo8(64)
-	adiw r30,  1  ; hi8(64)
-	cli
-	out SPL, r30
-	out SPH, r31
-	out SREG,r1
-	clr r1
-	clr r0
-	ret
-
-/**/
-;###########################################################	
-
-.global sha1_nextBlock
-; === sha1_nextBlock ===
-; this is the core function for calculating SHA-1 hashes
-;  param1: the 16-bit pointer to sha1_ctx structure
-;	given in r25,r24 (r25 is most significant)
-;  param2: an 16-bit pointer to 64 byte block to hash
-;	given in r23,r22
-sha1_nextBlock_localSpace = (16+5+1)*4 ; 16 32-bit values for w array and 5 32-bit values for a array (total 84 byte)
-
-xtmp = 0
-xNULL = 1
-W1 = 10
-W2 = 11
-T1	= 12
-T2	= 13
-T3	= 14
-T4	= 15
-LoopC = 16
-S	  = 17
-tmp1 = 18
-tmp2 = 19
-tmp3 = 20
-tmp4 = 21
-F1 = 22
-F2 = 23
-F3 = 24
-F4 = 25
-
-/* byteorder: high number <--> high significance */
-sha1_nextBlock:
- ; initial, let's make some space ready for local vars
- 			 /* replace push & pop by mem ops? */
-	push r10
-	push r11
-	push r12
-	push r13
-	push r14
-	push r15
-	push r16
-	push r17
-	push r28
-	push r29
-	in r20, SPL
-	in r21, SPH
-	movw r18, r20			;backup SP
-;	movw r26, r20			; X points to free space on stack /* maybe removeable? */ 
-	movw r30, r22			; Z points to message
-	subi r20, lo8(sha1_nextBlock_localSpace) ;sbiw can do only up to 63
-	sbci r21, hi8(sha1_nextBlock_localSpace)
-	movw r26, r20			; X points to free space on stack 
-	in r0, SREG
-	cli ; we want to be uninterrupted while updating SP
-	out SPL, r20
-	out SPH, r21
-	out SREG, r0
-	
-	push r18
-	push r19 /* push old SP on new stack */
-	push r24
-	push r25 /* param1 will be needed later */
-	
-	/* load a[] with state */
-	movw 28, r24 /* load pointer to state in Y */
-	adiw r26, 1 ; X++
-
-	ldi LoopC, 5*4	
-1:	ld tmp1, Y+
-	st X+, tmp1
-	dec LoopC
-	brne 1b
-
-	movw W1, r26 /* save pointer to w[0] */
-	/* load w[] with endian fixed message */
-		/* we might also use the changeendian32() function at bottom */
-	movw r30, r22 /* mv param2 (ponter to msg) to Z */	
-	ldi LoopC, 16
-1:
-	ldd tmp1, Z+3
-	st X+, tmp1
-	ldd tmp1, Z+2
-	st X+, tmp1
-	ldd tmp1, Z+1
-	st X+, tmp1
-	ld tmp1, Z
-	st X+, tmp1
-	adiw r30, 4
-	dec LoopC
-	brne 1b
-	
-	;clr LoopC /* LoopC is named t in FIPS 180-2 */	
-	clr xtmp
-sha1_nextBlock_mainloop:
-	mov S, LoopC
-	lsl S
-	lsl S
-	andi S, 0x3C /* S is a bytepointer so *4 */
-	/* load w[s] */
-	movw r26, W1
-	add r26, S /* X points at w[s] */
-	adc r27, xNULL
-	ld T1, X+
-	ld T2, X+
-	ld T3, X+
-	ld T4, X+
-
-	/**/
-	push r26
-	push r27
-	push T4
-	push T3
-	push T2
-	push T1
-	in r26, SPL
-	in r27, SPH
-	adiw r26, 1
-	dbg_hexdump 4
-	pop T1
-	pop T2
-	pop T3
-	pop T4
-	pop r27
-	pop r26
-	/**/
-
-	cpi LoopC, 16
-	brlt sha1_nextBlock_mainloop_core
-	/* update w[s] */
-	ldi tmp1, 2*4
-	rcall 1f
-	ldi tmp1, 8*4
-	rcall 1f
-	ldi tmp1, 13*4
-	rcall 1f
-	rjmp 2f
-1:		/* this might be "outsourced" to save the jump above */
-	add tmp1, S
-	andi tmp1, 0x3f
-	movw r26, W1
-	add r26, tmp1
-	adc r27, xNULL
-	ld tmp2, X+
-	eor T1, tmp2
-	ld tmp2, X+
-	eor T2, tmp2
-	ld tmp2, X+
-	eor T3, tmp2
-	ld tmp2, X+
-	eor T4, tmp2
-	ret
-2:	/* now we just hav to do a ROTL(T) and save T back */
-	mov tmp2, T4
-	rol tmp2
-	rol T1
-	rol T2
-	rol T3
-	rol T4
-	movw r26, W1
-	add r26, S
-	adc r27, xNULL
-	st X+, T1
-	st X+, T2
-	st X+, T3
-	st X+, T4
-	
-sha1_nextBlock_mainloop_core:	/* ther core function; T=ROTL5(a) ....*/	
-								/* T already contains w[s] */
-	movw r26, W1
-	sbiw r26, 4*1		/* X points at a[4] aka e */
-	ld tmp1, X+ 
-	add T1, tmp1
-	ld tmp1, X+ 
-	adc T2, tmp1
-	ld tmp1, X+ 
-	adc T3, tmp1
-	ld tmp1, X+ 
-	adc T4, tmp1		/* T = w[s]+e */
-	sbiw r26, 4*5		/* X points at a[0] aka a */
-	ld F1, X+ 
-	ld F2, X+ 
-	ld F3, X+ 
-	ld F4, X+ 
-	mov tmp1, F4		/* X points at a[1] aka b */
-	ldi tmp2, 5
-1:
-	rol tmp1
-	rol F1
-	rol F2
-	rol F3
-	rol F4
-	dec tmp2
-	brne 1b
-	
-	add T1, F1
-	adc T2, F2
-	adc T3, F3
-	adc T4, F4 /* T = ROTL(a,5) + e + w[s] */
-	
-	/* now we have to do this fucking conditional stuff */
-	ldi r30, lo8(sha1_nextBlock_xTable)
-	ldi r31, hi8(sha1_nextBlock_xTable)
-	add r30, xtmp
-	adc r31, xNULL
-	lpm tmp1, Z
-	cp tmp1, LoopC
-	brne 1f
-	inc xtmp
-1:	ldi r30, lo8(sha1_nextBlock_KTable)
-	ldi r31, hi8(sha1_nextBlock_KTable)
-	lsl xtmp
-	lsl xtmp
-	add r30, xtmp
-	adc r31, xNULL
-	lsr xtmp
-	lsr xtmp
-	 
-	lpm tmp1, Z+
-	add T1, tmp1
-	lpm tmp1, Z+
-	adc T2, tmp1
-	lpm tmp1, Z+
-	adc T3, tmp1
-	lpm tmp1, Z+
-	adc T4, tmp1
-			/* T = ROTL(a,5) + e + kt + w[s] */
-	
-	/* Z-4 is just pointing to kt ... */
-	movw r28, r26 /* copy X in Y */
-	adiw r30, 3*4 /* now Z points to the rigth locatin in our jump-vector-table */
-	lsr r31
-	ror r30
-		
-	icall
-	mov F1, tmp1
-	icall
-	mov F2, tmp1
-	icall
-	mov F3, tmp1
-	icall
-	
-	add T1, F1
-	adc T2, F2
-	adc T3, F3
-	adc T4, tmp1 /* T = ROTL5(a) + f_t(b,c,d) + e + k_t + w[s] */
-				 /* X points still at a[1] aka b, Y points at a[2] aka c */	
-	/* update a[] */
-sha1_nextBlock_update_a:
-	/*first we move all vars in a[] "one up" e=d, d=c, c=b, b=a*/
-	//adiw r28, 3*4  /* Y should point at a[4] aka e */
-	movw r28, W1
-	sbiw r28, 4
-	
-	ldi tmp2, 4*4 
-1:	
-	ld tmp1, -Y
-	std Y+4, tmp1
-	dec tmp2
-	brne 1b
-	/* Y points at a[0] aka a*/
-	
-	movw r28, W1
-	sbiw r28, 5*4
-	/* store T in a[0] aka a */
-	st Y+, T1
-	st Y+, T2
-	st Y+, T3
-	st Y+, T4
-	/* Y points at a[1] aka b*/
-	
-	/* rotate c */
-	ldd T1, Y+1*4
-	ldd T2, Y+1*4+1
-	ldd T3, Y+1*4+2
-	ldd T4, Y+1*4+3
-	mov tmp1, T1
-	ldi tmp2, 2
-1:	ror tmp1
-	ror T4
-	ror T3
-	ror T2
-	ror T1
-	dec tmp2
-	brne 1b
-	std Y+1*4+0, T1
-	std Y+1*4+1, T2
-	std Y+1*4+2, T3
-	std Y+1*4+3, T4
-	
-	push r27
-	push r26
-	movw r26, W1
-	sbiw r26, 4*5
-	dbg_hexdump 4*5
-	pop r26
-	pop r27
-	
-	inc LoopC
-	cpi LoopC, 80
-	brge 1f
-	rjmp sha1_nextBlock_mainloop
-/**************************************/
-1:	
-   /* littel patch */
-	sbiw r28, 4
-
-/* add a[] to state and inc length */	
-	pop r27
-	pop r26		/* now X points to state (and Y still at a[0]) */
-	ldi tmp4, 5
-1:	clc
-	ldi tmp3, 4
-2:	ld tmp1, X
-	ld tmp2, Y+
-	adc tmp1, tmp2
-	st X+, tmp1
-	dec tmp3
-	brne 2b
-	dec tmp4
-	brne 1b
-	
-	/* now length += 512 */
-	adiw r26, 1 /* we skip the least significant byte */
-	ld tmp1, X
-	ldi tmp2, hi8(512) /* 2 */
-	add tmp1, tmp2
-	st X+, tmp1
-	ldi tmp2, 6
-1:
-	ld tmp1, X
-	adc tmp1, xNULL
-	st X+, tmp1
-	dec tmp2
-	brne 1b
-	
-; EPILOG
-sha1_nextBlock_epilog:
-/* now we should clean up the stack */
-	pop r21
-	pop r20
-	in r0, SREG
-	cli ; we want to be uninterrupted while updating SP
-	out SPL, r20
-	out SPH, r21
-	out SREG, r0
-	
-	clr r1
-	pop r29
-	pop r28
-	pop r17
-	pop r16
-	pop r15
-	pop r14
-	pop r13
-	pop r12
-	pop r11
-	pop r10
-	ret
-
-sha1_nextBlock_xTable:
-.byte 20,40,60,0
-sha1_nextBlock_KTable:
-.int	0x5a827999 
-.int	0x6ed9eba1 
-.int	0x8f1bbcdc 
-.int	0xca62c1d6
-sha1_nextBlock_JumpTable:
-rjmp sha1_nextBlock_Ch
-	nop	
-rjmp sha1_nextBlock_Parity
-	nop
-rjmp sha1_nextBlock_Maj
-	nop
-rjmp sha1_nextBlock_Parity
-
-	 /* X and Y still point at a[1] aka b ; return value in tmp1 */
-sha1_nextBlock_Ch:
-	ld tmp1, Y+
-	mov tmp2, tmp1
-	com tmp2
-	ldd tmp3, Y+3	/* load from c */
-	and tmp1, tmp3
-	ldd tmp3, Y+7	/* load from d */
-	and tmp2, tmp3
-	eor tmp1, tmp2
-	ret
-	
-sha1_nextBlock_Maj:
-	ld tmp1, Y+
-	mov tmp2, tmp1
-	ldd tmp3, Y+3	/* load from c */
-	and tmp1, tmp3
-	ldd tmp4, Y+7	/* load from d */
-	and tmp2, tmp4
-	eor tmp1, tmp2
-	and tmp3, tmp4
-	eor tmp1, tmp3
-	ret
-
-sha1_nextBlock_Parity:
-	ld tmp1, Y+
-	ldd tmp2, Y+3	/* load from c */
-	eor tmp1, tmp2
-	ldd tmp2, Y+7	/* load from d */
-	eor tmp1, tmp2
-	ret
-/*	
-ch_str:			.asciz "\r\nCh"
-maj_str:		.asciz "\r\nMaj"
-parity_str:	.asciz "\r\nParity"
-*/
-;###########################################################	
-
-.global sha1_init 
-;void sha1_init(sha1_ctx_t *state){
-;	DEBUG_S("\r\nSHA1_INIT");
-;	state->h[0] = 0x67452301;
-;	state->h[1] = 0xefcdab89;
-;	state->h[2] = 0x98badcfe;
-;	state->h[3] = 0x10325476;
-;	state->h[4] = 0xc3d2e1f0;
-;	state->length = 0;
-;}
-; param1: (Func3,r24) 16-bit pointer to sha1_ctx_t struct in ram
-; modifys: Z(r30,r31), Func1, r22
-sha1_init:
-	movw r26, r24 ; (24,25) --> (26,27) load X with param1
-	ldi r30, lo8((sha1_init_vector))
-	ldi r31, hi8((sha1_init_vector))
-	ldi r22, 5*4 /* bytes to copy */
-sha1_init_vloop:	
-	lpm r23, Z+ 
-	st X+, r23
-	dec r22
-	brne sha1_init_vloop
-	ldi r22, 8
-sha1_init_lloop:
-	st X+, r1
-	dec r22
-	brne sha1_init_lloop
-	ret
-	
-sha1_init_vector:
-.int 0x67452301;
-.int 0xefcdab89;
-.int 0x98badcfe;
-.int 0x10325476;
-.int 0xc3d2e1f0;
-
diff --git a/sha1.c b/sha1.c
deleted file mode 100644
index 97cde32..0000000
--- a/sha1.c
+++ /dev/null
@@ -1,236 +0,0 @@
-/* sha1.c */
-/*
-    This file is part of the AVR-Crypto-Lib.
-    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-/**
- * \file	sha1.c
- * \author	Daniel Otte
- * \date	2006-10-08
- * \license GPLv3 or later
- * \brief SHA-1 implementation.
- * 
- */
- 
-#include <string.h> /* memcpy & co */
-#include <stdint.h>
-#include "config.h"
-#undef DEBUG
-#include "debug.h"
-#include "sha1.h"
-
-#define LITTLE_ENDIAN
-
-/********************************************************************************************************/
- 
-/**
- * \brief initialises given SHA-1 context
- * 
- */
-void sha1_init(sha1_ctx_t *state){
-	DEBUG_S("\r\nSHA1_INIT");
-	state->h[0] = 0x67452301;
-	state->h[1] = 0xefcdab89;
-	state->h[2] = 0x98badcfe;
-	state->h[3] = 0x10325476;
-	state->h[4] = 0xc3d2e1f0;
-	state->length = 0;
-}
-
-/********************************************************************************************************/
-/* some helping functions */
-uint32_t rotl32(uint32_t n, uint8_t bits){
-	return ((n<<bits) | (n>>(32-bits)));
-}
-
-uint32_t change_endian32(uint32_t x){
-	return (((x)<<24) | ((x)>>24) | (((x)& 0x0000ff00)<<8) | (((x)& 0x00ff0000)>>8));
-}
-
-
-/* three SHA-1 inner functions */
-uint32_t ch(uint32_t x, uint32_t y, uint32_t z){
-	DEBUG_S("\r\nCH");
-	return ((x&y)^((~x)&z));
-}
-
-uint32_t maj(uint32_t x, uint32_t y, uint32_t z){
-	DEBUG_S("\r\nMAJ");
-	return ((x&y)^(x&z)^(y&z));
-}
-
-uint32_t parity(uint32_t x, uint32_t y, uint32_t z){
-	DEBUG_S("\r\nPARITY");
-	return ((x^y)^z);
-}
-
-/********************************************************************************************************/
-/**
- * \brief "add" a block to the hash
- * This is the core function of the hash algorithm. To understand how it's working
- * and what thoese variables do, take a look at FIPS-182. This is an "alternativ" implementation 
- */
-
-#define MASK 0x0000000f 
-
-typedef uint32_t (*pf_t)(uint32_t x, uint32_t y, uint32_t z);
-
-void sha1_nextBlock (sha1_ctx_t *state, const void* block){
-	uint32_t a[5];
-	uint32_t w[16];
-	uint32_t temp;
-	uint8_t t,s;
-	pf_t f[] = {ch,parity,maj,parity};
-	uint32_t k[4]={	0x5a827999, 
-					0x6ed9eba1, 
-					0x8f1bbcdc, 
-					0xca62c1d6};
-	
-	/* load the w array (changing the endian and so) */
-	for(t=0; t<16; ++t){
-		w[t] = change_endian32(((uint32_t*)block)[t]);
-	}
-
-	uint8_t dbgi;
-	for(dbgi=0; dbgi<16; ++dbgi){
-		DEBUG_S("\n\rBlock:");
-		DEBUG_B(dbgi);
-		DEBUG_C(':');
-		#ifdef DEBUG
-			cli_hexdump(&(w[dbgi]) ,4);
-		#endif
-	}
-	
-	
-	/* load the state */
-	memcpy(a, state->h, 5*sizeof(uint32_t));
-	
-	
-	/* the fun stuff */
-	for(t=0; t<=79; ++t){
-		s = t & MASK;
-		if(t>=16){
-			#ifdef DEBUG
-			 DEBUG_S("\r\n ws = "); cli_hexdump(&ws, 4);
-			#endif
-			w[s] = rotl32( w[(s+13)&MASK] ^ w[(s+8)&MASK] ^ 
-				 w[(s+ 2)&MASK] ^ w[s] ,1);			
-			#ifdef DEBUG
-			 DEBUG_S(" --> ws = "); cli_hexdump(&(w[s]), 4);
-			#endif
-		}
-		
-		uint32_t dtemp;
-		temp = rotl32(a[0],5) + (dtemp=f[t/20](a[1],a[2],a[3])) + a[4] + k[t/20] + w[s];
-		memmove(&(a[1]), &(a[0]), 4*sizeof(uint32_t)); /* e=d; d=c; c=b; b=a; */
-		a[0] = temp;
-		a[2] = rotl32(a[2],30); /* we might also do rotr32(c,2) */
-		
-		/* debug dump */
-		DEBUG_S("\r\nt = "); DEBUG_B(t);
-		DEBUG_S("; a[]: ");
-		#ifdef DEBUG
-		 cli_hexdump(a, 5*4);
-		#endif
-		DEBUG_S("; k = ");
-		#ifdef DEBUG
-		 cli_hexdump(&(k[t/20]), 4);
-		#endif
-		DEBUG_S("; f(b,c,d) = ");
-		#ifdef DEBUG
-		 cli_hexdump(&dtemp, 4);
-		#endif
-	}
-	
-	/* update the state */
-	for(t=0; t<5; ++t){
-		state->h[t] += a[t];
-	}
-	state->length += 512;
-}
-
-/********************************************************************************************************/
-
-void sha1_lastBlock(sha1_ctx_t *state, const void* block, uint16_t length){
-	uint8_t lb[SHA1_BLOCK_BITS/8]; /* local block */
-	state->length += length;
-	memcpy (&(lb[0]), block, length/8);
-	
-	/* set the final one bit */
-	if (length & 0x7){ /* if we have single bits at the end */
-		lb[length/8] = ((uint8_t*)(block))[length/8];
-	} else {
-		lb[length/8] = 0;
-	}
-	lb[length/8] |= 0x80>>(length & 0x3);
-	length =(length >> 7) + 1; /* from now on length contains the number of BYTES in lb*/
-	/* pad with zeros */
-	if (length>64-8){ /* not enouth space for 64bit length value */
-		memset((void*)(&(lb[length])), 0, 64-length);
-		sha1_nextBlock(state, lb);
-		state->length -= 512;
-		length = 0;	
-	}
-	memset((void*)(&(lb[length])), 0, 56-length);
-	/* store the 64bit length value */
-#if defined LITTLE_ENDIAN
-	 	/* this is now rolled up */
-	uint8_t i; 	
-	for (i=1; i<=8; ++i){
-		lb[55+i] = (uint8_t)(state->length>>(64- 8*i));
-	}
-#elif defined BIG_ENDIAN
-	*((uint64_t)&(lb[56])) = state->length;
-#endif
-	sha1_nextBlock(state, lb);
-}
-
-/********************************************************************************************************/
-
-void sha1_ctx2hash (sha1_hash_t *dest, sha1_ctx_t *state){
-#if defined LITTLE_ENDIAN
-	uint8_t i;
-	for(i=0; i<8; ++i){
-		((uint32_t*)dest)[i] = change_endian32(state->h[i]);
-	}
-#elif BIG_ENDIAN
-	if (dest != state->h)
-		memcpy(dest, state->h, SHA256_HASH_BITS/8);
-#else
-# error unsupported endian type!
-#endif
-}
-
-/********************************************************************************************************/
-/**
- * 
- * 
- */
-void sha1 (sha1_hash_t *dest, const void* msg, uint32_t length){
-	sha1_ctx_t s;
-	DEBUG_S("\r\nBLA BLUB");
-	sha1_init(&s);
-	while(length & (~0x0001ff)){ /* length>=512 */
-		DEBUG_S("\r\none block");
-		sha1_nextBlock(&s, msg);
-		msg = (uint8_t*)msg + SHA1_BLOCK_BITS/8; /* increment pointer to next block */
-		length -= SHA1_BLOCK_BITS;
-	}
-	sha1_lastBlock(&s, msg, length);
-	sha1_ctx2hash(dest, &s);
-}
-
-
diff --git a/sha1.h b/sha1.h
deleted file mode 100644
index 6675d20..0000000
--- a/sha1.h
+++ /dev/null
@@ -1,117 +0,0 @@
-/* sha1.h */
-/*
-    This file is part of the AVR-Crypto-Lib.
-    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-/**
- * \file	sha1.h
- * \author	Daniel Otte
- * \email   daniel.otte@rub.de
- * \date	2006-10-08
- * \license GPLv3 or later
- * \brief   SHA-1 declaration.
- * \ingroup SHA-1
- * 
- */
- 
-#ifndef SHA1_H_
-#define SHA1_H_
-
-#include <stdint.h>
-/** \def SHA1_HASH_BITS
- * definees the size of a SHA-1 hash in bits 
- */
-
-/** \def SHA1_HASH_BYTES
- * definees the size of a SHA-1 hash in bytes 
- */
-
-/** \def SHA1_BLOCK_BITS
- * definees the size of a SHA-1 input block in bits 
- */
-
-/** \def SHA1_BLOCK_BYTES
- * definees the size of a SHA-1 input block in bytes 
- */
-#define SHA1_HASH_BITS  160
-#define SHA1_HASH_BYTES (SHA1_HASH_BITS/8)
-#define SHA1_BLOCK_BITS 512
-#define SHA1_BLOCK_BYTES (SHA1_BLOCK_BITS/8)
-
-/** \typedef sha1_ctx_t
- * \brief SHA-1 context type
- * 
- * A vatiable of this type may hold the state of a SHA-1 hashing process
- */
-typedef struct {
-	uint32_t h[5];
-	uint64_t length;
-} sha1_ctx_t;
-
-/** \typedef sha1_hash_t
- * \brief hash value type
- * A variable of this type may hold a SHA-1 hash value 
- */
-typedef uint8_t sha1_hash_t[SHA1_HASH_BITS/8];
-
-/** \fn sha1_init(sha1_ctx_t *state)
- * \brief initializes a SHA-1 context
- * This function sets a ::sha1_ctx_t variable to the initialization vector
- * for SHA-1 hashing.
- * \param state pointer to the SHA-1 context variable
- */
-void sha1_init(sha1_ctx_t *state);
-
-/** \fn sha1_nextBlock(sha1_ctx_t *state, const void* block)
- *  \brief process one input block
- * This function processes one input block and updates the hash context 
- * accordingly
- * \param state pointer to the state variable to update
- * \param block pointer to the message block to process
- */
-void sha1_nextBlock (sha1_ctx_t *state, const void* block);
-
-/** \fn sha1_lastBlock(sha1_ctx_t *state, const void* block, uint16_t length_b)
- * \brief processes the given block and finalizes the context
- * This function processes the last block in a SHA-1 hashing process.
- * The block should have a maximum length of a single input block.
- * \param state pointer to the state variable to update and finalize
- * \param block pointer to themessage block to process
- * \param length_b length of the message block in bits  
- */
-void sha1_lastBlock (sha1_ctx_t *state, const void* block, uint16_t length_b);
-
-/** \fn sha1_ctx2hash(sha1_hash_t *dest, sha1_ctx_t *state)
- * \brief convert a state variable into an actual hash value
- * Writes the hash value corresponding to the state to the memory pointed by dest.
- * \param dest pointer to the hash value destination
- * \param state pointer to the hash context
- */ 
-void sha1_ctx2hash (sha1_hash_t *dest, sha1_ctx_t *state);
-
-/** \fn sha1(sha1_hash_t *dest, const void* msg, uint32_t length_b)
- * \brief hashing a message which in located entirely in RAM
- * This function automatically hashes a message which is entirely in RAM with
- * the SHA-1 hashing algorithm.
- * \param dest pointer to the hash value destination
- * \param msg  pointer to the message which should be hashed
- * \param length_b length of the message in bits
- */ 
-void sha1(sha1_hash_t *dest, const void* msg, uint32_t length_b);
-
-
-
-#endif /*SHA1_H_*/
diff --git a/sha1/sha1-asm.S b/sha1/sha1-asm.S
new file mode 100644
index 0000000..f571685
--- /dev/null
+++ b/sha1/sha1-asm.S
@@ -0,0 +1,886 @@
+/* sha1-asm.S */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/*
+ * Author:	Daniel Otte
+ *
+ * License: GPLv3 or later
+*/
+; SHA1 implementation in assembler for AVR
+SHA1_BLOCK_BITS = 512
+SHA1_HASH_BITS = 160
+
+.macro precall
+	/* push r18 - r27, r30 - r31*/
+	push r0
+	push r1
+	push r18
+	push r19
+	push r20
+	push r21
+	push r22
+	push r23
+	push r24
+	push r25
+	push r26
+	push r27
+	push r30
+	push r31
+	clr r1
+.endm
+
+.macro postcall
+	pop r31
+	pop r30
+	pop r27
+	pop r26
+	pop r25
+	pop r24
+	pop r23
+	pop r22
+	pop r21
+	pop r20
+	pop r19
+	pop r18
+	pop r1
+	pop r0
+.endm
+
+
+.macro hexdump length
+	push r27
+	push r26
+	ldi r25, '\r'
+	mov r24, r25
+	call uart_putc
+	ldi r25, '\n'
+	mov r24, r25
+	call uart_putc
+	pop r26
+	pop r27
+	movw r24, r26
+.if \length > 16
+	ldi r22, lo8(16)
+	ldi r23, hi8(16)
+	push r27
+	push r26
+	call uart_hexdump
+	pop r26
+	pop r27
+	adiw r26, 16
+	hexdump \length-16
+.else
+	ldi r22, lo8(\length)
+	ldi r23, hi8(\length)
+	call uart_hexdump
+.endif
+.endm
+
+.macro delay
+/*	
+	push r0
+	push r1
+	clr r0
+1:	clr r1
+2:	dec r1
+	brne 2b
+	dec r0
+	brne 1b
+	pop r1
+	pop r0  // */
+.endm
+
+/* X points to Block */
+.macro dbg_hexdump length
+/*	
+	precall
+	hexdump \length
+	postcall
+	// */
+.endm
+
+
+
+.section .text
+
+SPL = 0x3D
+SPH = 0x3E
+SREG = 0x3F
+
+
+;
+;sha1_ctx_t is:
+;
+; [h0][h1][h2][h3][h4][length]
+; hn is 32 bit large, length is 64 bit large
+
+;###########################################################	
+
+.global sha1_ctx2hash
+; === sha1_ctx2hash ===
+; this function converts a state into a normal hash (bytestring)
+;  param1: the 16-bit destination pointer
+;	given in r25,r24 (r25 is most significant)
+;  param2: the 16-bit pointer to sha1_ctx structure
+;	given in r23,r22
+sha1_ctx2hash:
+	movw r26, r22
+	movw r30, r24
+	ldi r21, 5
+	sbiw r26, 4
+1:	
+	ldi r20, 4
+	adiw r26, 8
+2:	
+		ld r0, -X
+		st Z+, r0	
+	dec r20
+	brne 2b
+	
+	dec r21
+	brne 1b
+	
+	ret
+
+;###########################################################	
+
+.global sha1
+; === sha1 ===
+; this function calculates SHA-1 hashes from messages in RAM
+;  param1: the 16-bit hash destination pointer
+;	given in r25,r24 (r25 is most significant)
+;  param2: the 16-bit pointer to message
+;	given in r23,r22
+;  param3: 32-bit length value (length of message in bits)
+;   given in r21,r20,r19,r18
+sha1:
+sha1_prolog:
+	push r8
+	push r9
+	push r10
+	push r11
+	push r12
+	push r13
+	push r16
+	push r17
+	in r16, SPL
+	in r17, SPH
+	subi r16, 5*4+8 
+	sbci r17, 0	
+	in r0, SREG
+	cli
+	out SPL, r16
+	out SPH, r17
+	out SREG, r0
+	
+	push r25
+	push r24
+	inc r16
+	adc r17, r1
+	
+	movw r8, r18		/* backup of length*/
+	movw r10, r20
+	
+	movw r12, r22	/* backup pf msg-ptr */
+	
+	movw r24, r16
+	rcall sha1_init
+	/* if length >= 512 */
+1:
+	tst r11
+	brne 4f
+	tst r10
+	brne 4f
+	mov r19, r9
+	cpi r19, 0x02
+	brlo 4f
+	
+	movw r24, r16
+	movw r22, r12
+	rcall sha1_nextBlock
+	ldi r19, 0x64
+	add r22, r19
+	adc r23, r1
+	/* length -= 512 */
+	ldi r19, 0x02
+	sub r9, r19
+	sbc r10, r1
+	sbc r11, r1
+	rjmp 1b
+	
+4:
+	movw r24, r16
+	movw r22, r12
+	movw r20, r8
+	rcall sha1_lastBlock
+	
+	pop r24
+	pop r25
+	movw r22, r16
+	rcall sha1_ctx2hash	
+	
+sha1_epilog:
+	in r30, SPL
+	in r31, SPH
+	adiw r30, 5*4+8 	
+	in r0, SREG
+	cli
+	out SPL, r30
+	out SPH, r31
+	out SREG, r0
+	pop r17
+	pop r16
+	pop r13
+	pop r12
+	pop r11
+	pop r10
+	pop r9
+	pop r8
+	ret
+
+;###########################################################	
+
+
+; block MUST NOT be larger than 64 bytes
+
+.global sha1_lastBlock
+; === sha1_lastBlock ===
+; this function does padding & Co. for calculating SHA-1 hashes
+;  param1: the 16-bit pointer to sha1_ctx structure
+;	given in r25,r24 (r25 is most significant)
+;  param2: an 16-bit pointer to 64 byte block to hash
+;	given in r23,r22
+;  param3: an 16-bit integer specifing length of block in bits
+;	given in r21,r20
+sha1_lastBlock_localSpace = (SHA1_BLOCK_BITS/8+1)
+
+
+sha1_lastBlock:
+	cpi r21, 0x02
+	brlo sha1_lastBlock_prolog
+	push r25
+	push r24
+	push r23
+	push r22
+	push r21
+	push r20
+	rcall sha1_nextBlock
+	pop r20
+	pop r21
+	pop r22
+	pop r23
+	pop r24
+	pop r25
+	subi r21, 2
+	subi r23, -2
+	rjmp sha1_lastBlock
+sha1_lastBlock_prolog:
+	/* allocate space on stack */
+	in r30, SPL
+	in r31, SPH
+	in r1, SREG
+	subi r30, lo8(64)
+	sbci r31, hi8(64) /* ??? */
+	cli
+	out SPL, r30
+	out SPH, r31
+	out SREG,r1
+
+	adiw r30, 1 /* SP points to next free byte on stack */
+	mov r18, r20 /* r20 = LSB(length) */
+	lsr r18
+	lsr r18
+	lsr r18
+	bst r21, 0	/* may be we should explain this ... */
+	bld r18, 5  /* now: r18 == length/8 (aka. length in bytes) */
+	
+	
+	movw r26, r22 /* X points to begin of msg */
+	tst r18
+	breq sha1_lastBlock_post_copy
+	mov r1, r18
+sha1_lastBlock_copy_loop:
+	ld r0, X+
+	st Z+, r0
+	dec r1
+	brne sha1_lastBlock_copy_loop
+sha1_lastBlock_post_copy:	
+sha1_lastBlock_insert_stuffing_bit:	
+	ldi r19, 0x80
+	mov r0,r19 	
+	ldi r19, 0x07
+	and r19, r20 /* if we are in bitmode */
+	breq 2f	/* no bitmode */
+1:	
+	lsr r0
+	dec r19
+	brne 1b
+	ld r19, X
+/* maybe we should do some ANDing here, just for safety */
+	or r0, r19
+2:	
+	st Z+, r0
+	inc r18
+
+/* checking stuff here */
+	cpi r18, 64-8+1
+	brsh 0f 
+	rjmp sha1_lastBlock_insert_zeros
+0:
+	/* oh shit, we landed here */
+	/* first we have to fill it up with zeros */
+	ldi r19, 64
+	sub r19, r18
+	breq 2f
+1:	
+	st Z+, r1
+	dec r19
+	brne 1b	
+2:	
+	sbiw r30, 63
+	sbiw r30,  1
+	movw r22, r30
+	
+	push r31
+	push r30
+	push r25
+	push r24
+	push r21
+	push r20
+	rcall sha1_nextBlock
+	pop r20
+	pop r21
+	pop r24
+	pop r25
+	pop r30
+	pop r31
+	
+	/* now we should subtract 512 from length */
+	movw r26, r24
+	adiw r26, 4*5+1 /* we can skip the lowest byte */
+	ld r19, X
+	subi r19, hi8(512)
+	st X+, r19
+	ldi r18, 6
+1:
+	ld r19, X
+	sbci r19, 0
+	st X+, r19
+	dec r18
+	brne 1b
+	
+;	clr r18 /* not neccessary ;-) */
+	/* reset Z pointer to begin of block */
+
+sha1_lastBlock_insert_zeros:	
+	ldi r19, 64-8
+	sub r19, r18
+	breq sha1_lastBlock_insert_length
+	clr r1
+1:
+	st Z+, r1	/* r1 is still zero */
+	dec r19
+	brne 1b
+
+;	rjmp sha1_lastBlock_epilog
+sha1_lastBlock_insert_length:
+	movw r26, r24	/* X points to state */
+	adiw r26, 5*4	/* X points to (state.length) */
+	adiw r30, 8		/* Z points one after the last byte of block */
+	ld r0, X+
+	add r0, r20
+	st -Z, r0
+	ld r0, X+
+	adc r0, r21
+	st -Z, r0
+	ldi r19, 6
+1:
+	ld r0, X+
+	adc r0, r1
+	st -Z, r0
+	dec r19
+	brne 1b
+
+	sbiw r30, 64-8
+	movw r22, r30
+	rcall sha1_nextBlock
+
+sha1_lastBlock_epilog:
+	in r30, SPL
+	in r31, SPH
+	in r1, SREG
+	adiw r30, 63 ; lo8(64)
+	adiw r30,  1  ; hi8(64)
+	cli
+	out SPL, r30
+	out SPH, r31
+	out SREG,r1
+	clr r1
+	clr r0
+	ret
+
+/**/
+;###########################################################	
+
+.global sha1_nextBlock
+; === sha1_nextBlock ===
+; this is the core function for calculating SHA-1 hashes
+;  param1: the 16-bit pointer to sha1_ctx structure
+;	given in r25,r24 (r25 is most significant)
+;  param2: an 16-bit pointer to 64 byte block to hash
+;	given in r23,r22
+sha1_nextBlock_localSpace = (16+5+1)*4 ; 16 32-bit values for w array and 5 32-bit values for a array (total 84 byte)
+
+xtmp = 0
+xNULL = 1
+W1 = 10
+W2 = 11
+T1	= 12
+T2	= 13
+T3	= 14
+T4	= 15
+LoopC = 16
+S	  = 17
+tmp1 = 18
+tmp2 = 19
+tmp3 = 20
+tmp4 = 21
+F1 = 22
+F2 = 23
+F3 = 24
+F4 = 25
+
+/* byteorder: high number <--> high significance */
+sha1_nextBlock:
+ ; initial, let's make some space ready for local vars
+ 			 /* replace push & pop by mem ops? */
+	push r10
+	push r11
+	push r12
+	push r13
+	push r14
+	push r15
+	push r16
+	push r17
+	push r28
+	push r29
+	in r20, SPL
+	in r21, SPH
+	movw r18, r20			;backup SP
+;	movw r26, r20			; X points to free space on stack /* maybe removeable? */ 
+	movw r30, r22			; Z points to message
+	subi r20, lo8(sha1_nextBlock_localSpace) ;sbiw can do only up to 63
+	sbci r21, hi8(sha1_nextBlock_localSpace)
+	movw r26, r20			; X points to free space on stack 
+	in r0, SREG
+	cli ; we want to be uninterrupted while updating SP
+	out SPL, r20
+	out SPH, r21
+	out SREG, r0
+	
+	push r18
+	push r19 /* push old SP on new stack */
+	push r24
+	push r25 /* param1 will be needed later */
+	
+	/* load a[] with state */
+	movw 28, r24 /* load pointer to state in Y */
+	adiw r26, 1 ; X++
+
+	ldi LoopC, 5*4	
+1:	ld tmp1, Y+
+	st X+, tmp1
+	dec LoopC
+	brne 1b
+
+	movw W1, r26 /* save pointer to w[0] */
+	/* load w[] with endian fixed message */
+		/* we might also use the changeendian32() function at bottom */
+	movw r30, r22 /* mv param2 (ponter to msg) to Z */	
+	ldi LoopC, 16
+1:
+	ldd tmp1, Z+3
+	st X+, tmp1
+	ldd tmp1, Z+2
+	st X+, tmp1
+	ldd tmp1, Z+1
+	st X+, tmp1
+	ld tmp1, Z
+	st X+, tmp1
+	adiw r30, 4
+	dec LoopC
+	brne 1b
+	
+	;clr LoopC /* LoopC is named t in FIPS 180-2 */	
+	clr xtmp
+sha1_nextBlock_mainloop:
+	mov S, LoopC
+	lsl S
+	lsl S
+	andi S, 0x3C /* S is a bytepointer so *4 */
+	/* load w[s] */
+	movw r26, W1
+	add r26, S /* X points at w[s] */
+	adc r27, xNULL
+	ld T1, X+
+	ld T2, X+
+	ld T3, X+
+	ld T4, X+
+
+	/**/
+	push r26
+	push r27
+	push T4
+	push T3
+	push T2
+	push T1
+	in r26, SPL
+	in r27, SPH
+	adiw r26, 1
+	dbg_hexdump 4
+	pop T1
+	pop T2
+	pop T3
+	pop T4
+	pop r27
+	pop r26
+	/**/
+
+	cpi LoopC, 16
+	brlt sha1_nextBlock_mainloop_core
+	/* update w[s] */
+	ldi tmp1, 2*4
+	rcall 1f
+	ldi tmp1, 8*4
+	rcall 1f
+	ldi tmp1, 13*4
+	rcall 1f
+	rjmp 2f
+1:		/* this might be "outsourced" to save the jump above */
+	add tmp1, S
+	andi tmp1, 0x3f
+	movw r26, W1
+	add r26, tmp1
+	adc r27, xNULL
+	ld tmp2, X+
+	eor T1, tmp2
+	ld tmp2, X+
+	eor T2, tmp2
+	ld tmp2, X+
+	eor T3, tmp2
+	ld tmp2, X+
+	eor T4, tmp2
+	ret
+2:	/* now we just hav to do a ROTL(T) and save T back */
+	mov tmp2, T4
+	rol tmp2
+	rol T1
+	rol T2
+	rol T3
+	rol T4
+	movw r26, W1
+	add r26, S
+	adc r27, xNULL
+	st X+, T1
+	st X+, T2
+	st X+, T3
+	st X+, T4
+	
+sha1_nextBlock_mainloop_core:	/* ther core function; T=ROTL5(a) ....*/	
+								/* T already contains w[s] */
+	movw r26, W1
+	sbiw r26, 4*1		/* X points at a[4] aka e */
+	ld tmp1, X+ 
+	add T1, tmp1
+	ld tmp1, X+ 
+	adc T2, tmp1
+	ld tmp1, X+ 
+	adc T3, tmp1
+	ld tmp1, X+ 
+	adc T4, tmp1		/* T = w[s]+e */
+	sbiw r26, 4*5		/* X points at a[0] aka a */
+	ld F1, X+ 
+	ld F2, X+ 
+	ld F3, X+ 
+	ld F4, X+ 
+	mov tmp1, F4		/* X points at a[1] aka b */
+	ldi tmp2, 5
+1:
+	rol tmp1
+	rol F1
+	rol F2
+	rol F3
+	rol F4
+	dec tmp2
+	brne 1b
+	
+	add T1, F1
+	adc T2, F2
+	adc T3, F3
+	adc T4, F4 /* T = ROTL(a,5) + e + w[s] */
+	
+	/* now we have to do this fucking conditional stuff */
+	ldi r30, lo8(sha1_nextBlock_xTable)
+	ldi r31, hi8(sha1_nextBlock_xTable)
+	add r30, xtmp
+	adc r31, xNULL
+	lpm tmp1, Z
+	cp tmp1, LoopC
+	brne 1f
+	inc xtmp
+1:	ldi r30, lo8(sha1_nextBlock_KTable)
+	ldi r31, hi8(sha1_nextBlock_KTable)
+	lsl xtmp
+	lsl xtmp
+	add r30, xtmp
+	adc r31, xNULL
+	lsr xtmp
+	lsr xtmp
+	 
+	lpm tmp1, Z+
+	add T1, tmp1
+	lpm tmp1, Z+
+	adc T2, tmp1
+	lpm tmp1, Z+
+	adc T3, tmp1
+	lpm tmp1, Z+
+	adc T4, tmp1
+			/* T = ROTL(a,5) + e + kt + w[s] */
+	
+	/* Z-4 is just pointing to kt ... */
+	movw r28, r26 /* copy X in Y */
+	adiw r30, 3*4 /* now Z points to the rigth locatin in our jump-vector-table */
+	lsr r31
+	ror r30
+		
+	icall
+	mov F1, tmp1
+	icall
+	mov F2, tmp1
+	icall
+	mov F3, tmp1
+	icall
+	
+	add T1, F1
+	adc T2, F2
+	adc T3, F3
+	adc T4, tmp1 /* T = ROTL5(a) + f_t(b,c,d) + e + k_t + w[s] */
+				 /* X points still at a[1] aka b, Y points at a[2] aka c */	
+	/* update a[] */
+sha1_nextBlock_update_a:
+	/*first we move all vars in a[] "one up" e=d, d=c, c=b, b=a*/
+	//adiw r28, 3*4  /* Y should point at a[4] aka e */
+	movw r28, W1
+	sbiw r28, 4
+	
+	ldi tmp2, 4*4 
+1:	
+	ld tmp1, -Y
+	std Y+4, tmp1
+	dec tmp2
+	brne 1b
+	/* Y points at a[0] aka a*/
+	
+	movw r28, W1
+	sbiw r28, 5*4
+	/* store T in a[0] aka a */
+	st Y+, T1
+	st Y+, T2
+	st Y+, T3
+	st Y+, T4
+	/* Y points at a[1] aka b*/
+	
+	/* rotate c */
+	ldd T1, Y+1*4
+	ldd T2, Y+1*4+1
+	ldd T3, Y+1*4+2
+	ldd T4, Y+1*4+3
+	mov tmp1, T1
+	ldi tmp2, 2
+1:	ror tmp1
+	ror T4
+	ror T3
+	ror T2
+	ror T1
+	dec tmp2
+	brne 1b
+	std Y+1*4+0, T1
+	std Y+1*4+1, T2
+	std Y+1*4+2, T3
+	std Y+1*4+3, T4
+	
+	push r27
+	push r26
+	movw r26, W1
+	sbiw r26, 4*5
+	dbg_hexdump 4*5
+	pop r26
+	pop r27
+	
+	inc LoopC
+	cpi LoopC, 80
+	brge 1f
+	rjmp sha1_nextBlock_mainloop
+/**************************************/
+1:	
+   /* littel patch */
+	sbiw r28, 4
+
+/* add a[] to state and inc length */	
+	pop r27
+	pop r26		/* now X points to state (and Y still at a[0]) */
+	ldi tmp4, 5
+1:	clc
+	ldi tmp3, 4
+2:	ld tmp1, X
+	ld tmp2, Y+
+	adc tmp1, tmp2
+	st X+, tmp1
+	dec tmp3
+	brne 2b
+	dec tmp4
+	brne 1b
+	
+	/* now length += 512 */
+	adiw r26, 1 /* we skip the least significant byte */
+	ld tmp1, X
+	ldi tmp2, hi8(512) /* 2 */
+	add tmp1, tmp2
+	st X+, tmp1
+	ldi tmp2, 6
+1:
+	ld tmp1, X
+	adc tmp1, xNULL
+	st X+, tmp1
+	dec tmp2
+	brne 1b
+	
+; EPILOG
+sha1_nextBlock_epilog:
+/* now we should clean up the stack */
+	pop r21
+	pop r20
+	in r0, SREG
+	cli ; we want to be uninterrupted while updating SP
+	out SPL, r20
+	out SPH, r21
+	out SREG, r0
+	
+	clr r1
+	pop r29
+	pop r28
+	pop r17
+	pop r16
+	pop r15
+	pop r14
+	pop r13
+	pop r12
+	pop r11
+	pop r10
+	ret
+
+sha1_nextBlock_xTable:
+.byte 20,40,60,0
+sha1_nextBlock_KTable:
+.int	0x5a827999 
+.int	0x6ed9eba1 
+.int	0x8f1bbcdc 
+.int	0xca62c1d6
+sha1_nextBlock_JumpTable:
+rjmp sha1_nextBlock_Ch
+	nop	
+rjmp sha1_nextBlock_Parity
+	nop
+rjmp sha1_nextBlock_Maj
+	nop
+rjmp sha1_nextBlock_Parity
+
+	 /* X and Y still point at a[1] aka b ; return value in tmp1 */
+sha1_nextBlock_Ch:
+	ld tmp1, Y+
+	mov tmp2, tmp1
+	com tmp2
+	ldd tmp3, Y+3	/* load from c */
+	and tmp1, tmp3
+	ldd tmp3, Y+7	/* load from d */
+	and tmp2, tmp3
+	eor tmp1, tmp2
+	ret
+	
+sha1_nextBlock_Maj:
+	ld tmp1, Y+
+	mov tmp2, tmp1
+	ldd tmp3, Y+3	/* load from c */
+	and tmp1, tmp3
+	ldd tmp4, Y+7	/* load from d */
+	and tmp2, tmp4
+	eor tmp1, tmp2
+	and tmp3, tmp4
+	eor tmp1, tmp3
+	ret
+
+sha1_nextBlock_Parity:
+	ld tmp1, Y+
+	ldd tmp2, Y+3	/* load from c */
+	eor tmp1, tmp2
+	ldd tmp2, Y+7	/* load from d */
+	eor tmp1, tmp2
+	ret
+/*	
+ch_str:			.asciz "\r\nCh"
+maj_str:		.asciz "\r\nMaj"
+parity_str:	.asciz "\r\nParity"
+*/
+;###########################################################	
+
+.global sha1_init 
+;void sha1_init(sha1_ctx_t *state){
+;	DEBUG_S("\r\nSHA1_INIT");
+;	state->h[0] = 0x67452301;
+;	state->h[1] = 0xefcdab89;
+;	state->h[2] = 0x98badcfe;
+;	state->h[3] = 0x10325476;
+;	state->h[4] = 0xc3d2e1f0;
+;	state->length = 0;
+;}
+; param1: (Func3,r24) 16-bit pointer to sha1_ctx_t struct in ram
+; modifys: Z(r30,r31), Func1, r22
+sha1_init:
+	movw r26, r24 ; (24,25) --> (26,27) load X with param1
+	ldi r30, lo8((sha1_init_vector))
+	ldi r31, hi8((sha1_init_vector))
+	ldi r22, 5*4 /* bytes to copy */
+sha1_init_vloop:	
+	lpm r23, Z+ 
+	st X+, r23
+	dec r22
+	brne sha1_init_vloop
+	ldi r22, 8
+sha1_init_lloop:
+	st X+, r1
+	dec r22
+	brne sha1_init_lloop
+	ret
+	
+sha1_init_vector:
+.int 0x67452301;
+.int 0xefcdab89;
+.int 0x98badcfe;
+.int 0x10325476;
+.int 0xc3d2e1f0;
+
diff --git a/sha1/sha1.c b/sha1/sha1.c
new file mode 100644
index 0000000..97cde32
--- /dev/null
+++ b/sha1/sha1.c
@@ -0,0 +1,236 @@
+/* sha1.c */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/**
+ * \file	sha1.c
+ * \author	Daniel Otte
+ * \date	2006-10-08
+ * \license GPLv3 or later
+ * \brief SHA-1 implementation.
+ * 
+ */
+ 
+#include <string.h> /* memcpy & co */
+#include <stdint.h>
+#include "config.h"
+#undef DEBUG
+#include "debug.h"
+#include "sha1.h"
+
+#define LITTLE_ENDIAN
+
+/********************************************************************************************************/
+ 
+/**
+ * \brief initialises given SHA-1 context
+ * 
+ */
+void sha1_init(sha1_ctx_t *state){
+	DEBUG_S("\r\nSHA1_INIT");
+	state->h[0] = 0x67452301;
+	state->h[1] = 0xefcdab89;
+	state->h[2] = 0x98badcfe;
+	state->h[3] = 0x10325476;
+	state->h[4] = 0xc3d2e1f0;
+	state->length = 0;
+}
+
+/********************************************************************************************************/
+/* some helping functions */
+uint32_t rotl32(uint32_t n, uint8_t bits){
+	return ((n<<bits) | (n>>(32-bits)));
+}
+
+uint32_t change_endian32(uint32_t x){
+	return (((x)<<24) | ((x)>>24) | (((x)& 0x0000ff00)<<8) | (((x)& 0x00ff0000)>>8));
+}
+
+
+/* three SHA-1 inner functions */
+uint32_t ch(uint32_t x, uint32_t y, uint32_t z){
+	DEBUG_S("\r\nCH");
+	return ((x&y)^((~x)&z));
+}
+
+uint32_t maj(uint32_t x, uint32_t y, uint32_t z){
+	DEBUG_S("\r\nMAJ");
+	return ((x&y)^(x&z)^(y&z));
+}
+
+uint32_t parity(uint32_t x, uint32_t y, uint32_t z){
+	DEBUG_S("\r\nPARITY");
+	return ((x^y)^z);
+}
+
+/********************************************************************************************************/
+/**
+ * \brief "add" a block to the hash
+ * This is the core function of the hash algorithm. To understand how it's working
+ * and what thoese variables do, take a look at FIPS-182. This is an "alternativ" implementation 
+ */
+
+#define MASK 0x0000000f 
+
+typedef uint32_t (*pf_t)(uint32_t x, uint32_t y, uint32_t z);
+
+void sha1_nextBlock (sha1_ctx_t *state, const void* block){
+	uint32_t a[5];
+	uint32_t w[16];
+	uint32_t temp;
+	uint8_t t,s;
+	pf_t f[] = {ch,parity,maj,parity};
+	uint32_t k[4]={	0x5a827999, 
+					0x6ed9eba1, 
+					0x8f1bbcdc, 
+					0xca62c1d6};
+	
+	/* load the w array (changing the endian and so) */
+	for(t=0; t<16; ++t){
+		w[t] = change_endian32(((uint32_t*)block)[t]);
+	}
+
+	uint8_t dbgi;
+	for(dbgi=0; dbgi<16; ++dbgi){
+		DEBUG_S("\n\rBlock:");
+		DEBUG_B(dbgi);
+		DEBUG_C(':');
+		#ifdef DEBUG
+			cli_hexdump(&(w[dbgi]) ,4);
+		#endif
+	}
+	
+	
+	/* load the state */
+	memcpy(a, state->h, 5*sizeof(uint32_t));
+	
+	
+	/* the fun stuff */
+	for(t=0; t<=79; ++t){
+		s = t & MASK;
+		if(t>=16){
+			#ifdef DEBUG
+			 DEBUG_S("\r\n ws = "); cli_hexdump(&ws, 4);
+			#endif
+			w[s] = rotl32( w[(s+13)&MASK] ^ w[(s+8)&MASK] ^ 
+				 w[(s+ 2)&MASK] ^ w[s] ,1);			
+			#ifdef DEBUG
+			 DEBUG_S(" --> ws = "); cli_hexdump(&(w[s]), 4);
+			#endif
+		}
+		
+		uint32_t dtemp;
+		temp = rotl32(a[0],5) + (dtemp=f[t/20](a[1],a[2],a[3])) + a[4] + k[t/20] + w[s];
+		memmove(&(a[1]), &(a[0]), 4*sizeof(uint32_t)); /* e=d; d=c; c=b; b=a; */
+		a[0] = temp;
+		a[2] = rotl32(a[2],30); /* we might also do rotr32(c,2) */
+		
+		/* debug dump */
+		DEBUG_S("\r\nt = "); DEBUG_B(t);
+		DEBUG_S("; a[]: ");
+		#ifdef DEBUG
+		 cli_hexdump(a, 5*4);
+		#endif
+		DEBUG_S("; k = ");
+		#ifdef DEBUG
+		 cli_hexdump(&(k[t/20]), 4);
+		#endif
+		DEBUG_S("; f(b,c,d) = ");
+		#ifdef DEBUG
+		 cli_hexdump(&dtemp, 4);
+		#endif
+	}
+	
+	/* update the state */
+	for(t=0; t<5; ++t){
+		state->h[t] += a[t];
+	}
+	state->length += 512;
+}
+
+/********************************************************************************************************/
+
+void sha1_lastBlock(sha1_ctx_t *state, const void* block, uint16_t length){
+	uint8_t lb[SHA1_BLOCK_BITS/8]; /* local block */
+	state->length += length;
+	memcpy (&(lb[0]), block, length/8);
+	
+	/* set the final one bit */
+	if (length & 0x7){ /* if we have single bits at the end */
+		lb[length/8] = ((uint8_t*)(block))[length/8];
+	} else {
+		lb[length/8] = 0;
+	}
+	lb[length/8] |= 0x80>>(length & 0x3);
+	length =(length >> 7) + 1; /* from now on length contains the number of BYTES in lb*/
+	/* pad with zeros */
+	if (length>64-8){ /* not enouth space for 64bit length value */
+		memset((void*)(&(lb[length])), 0, 64-length);
+		sha1_nextBlock(state, lb);
+		state->length -= 512;
+		length = 0;	
+	}
+	memset((void*)(&(lb[length])), 0, 56-length);
+	/* store the 64bit length value */
+#if defined LITTLE_ENDIAN
+	 	/* this is now rolled up */
+	uint8_t i; 	
+	for (i=1; i<=8; ++i){
+		lb[55+i] = (uint8_t)(state->length>>(64- 8*i));
+	}
+#elif defined BIG_ENDIAN
+	*((uint64_t)&(lb[56])) = state->length;
+#endif
+	sha1_nextBlock(state, lb);
+}
+
+/********************************************************************************************************/
+
+void sha1_ctx2hash (sha1_hash_t *dest, sha1_ctx_t *state){
+#if defined LITTLE_ENDIAN
+	uint8_t i;
+	for(i=0; i<8; ++i){
+		((uint32_t*)dest)[i] = change_endian32(state->h[i]);
+	}
+#elif BIG_ENDIAN
+	if (dest != state->h)
+		memcpy(dest, state->h, SHA256_HASH_BITS/8);
+#else
+# error unsupported endian type!
+#endif
+}
+
+/********************************************************************************************************/
+/**
+ * 
+ * 
+ */
+void sha1 (sha1_hash_t *dest, const void* msg, uint32_t length){
+	sha1_ctx_t s;
+	DEBUG_S("\r\nBLA BLUB");
+	sha1_init(&s);
+	while(length & (~0x0001ff)){ /* length>=512 */
+		DEBUG_S("\r\none block");
+		sha1_nextBlock(&s, msg);
+		msg = (uint8_t*)msg + SHA1_BLOCK_BITS/8; /* increment pointer to next block */
+		length -= SHA1_BLOCK_BITS;
+	}
+	sha1_lastBlock(&s, msg, length);
+	sha1_ctx2hash(dest, &s);
+}
+
+
diff --git a/sha1/sha1.h b/sha1/sha1.h
new file mode 100644
index 0000000..6675d20
--- /dev/null
+++ b/sha1/sha1.h
@@ -0,0 +1,117 @@
+/* sha1.h */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/**
+ * \file	sha1.h
+ * \author	Daniel Otte
+ * \email   daniel.otte@rub.de
+ * \date	2006-10-08
+ * \license GPLv3 or later
+ * \brief   SHA-1 declaration.
+ * \ingroup SHA-1
+ * 
+ */
+ 
+#ifndef SHA1_H_
+#define SHA1_H_
+
+#include <stdint.h>
+/** \def SHA1_HASH_BITS
+ * definees the size of a SHA-1 hash in bits 
+ */
+
+/** \def SHA1_HASH_BYTES
+ * definees the size of a SHA-1 hash in bytes 
+ */
+
+/** \def SHA1_BLOCK_BITS
+ * definees the size of a SHA-1 input block in bits 
+ */
+
+/** \def SHA1_BLOCK_BYTES
+ * definees the size of a SHA-1 input block in bytes 
+ */
+#define SHA1_HASH_BITS  160
+#define SHA1_HASH_BYTES (SHA1_HASH_BITS/8)
+#define SHA1_BLOCK_BITS 512
+#define SHA1_BLOCK_BYTES (SHA1_BLOCK_BITS/8)
+
+/** \typedef sha1_ctx_t
+ * \brief SHA-1 context type
+ * 
+ * A vatiable of this type may hold the state of a SHA-1 hashing process
+ */
+typedef struct {
+	uint32_t h[5];
+	uint64_t length;
+} sha1_ctx_t;
+
+/** \typedef sha1_hash_t
+ * \brief hash value type
+ * A variable of this type may hold a SHA-1 hash value 
+ */
+typedef uint8_t sha1_hash_t[SHA1_HASH_BITS/8];
+
+/** \fn sha1_init(sha1_ctx_t *state)
+ * \brief initializes a SHA-1 context
+ * This function sets a ::sha1_ctx_t variable to the initialization vector
+ * for SHA-1 hashing.
+ * \param state pointer to the SHA-1 context variable
+ */
+void sha1_init(sha1_ctx_t *state);
+
+/** \fn sha1_nextBlock(sha1_ctx_t *state, const void* block)
+ *  \brief process one input block
+ * This function processes one input block and updates the hash context 
+ * accordingly
+ * \param state pointer to the state variable to update
+ * \param block pointer to the message block to process
+ */
+void sha1_nextBlock (sha1_ctx_t *state, const void* block);
+
+/** \fn sha1_lastBlock(sha1_ctx_t *state, const void* block, uint16_t length_b)
+ * \brief processes the given block and finalizes the context
+ * This function processes the last block in a SHA-1 hashing process.
+ * The block should have a maximum length of a single input block.
+ * \param state pointer to the state variable to update and finalize
+ * \param block pointer to themessage block to process
+ * \param length_b length of the message block in bits  
+ */
+void sha1_lastBlock (sha1_ctx_t *state, const void* block, uint16_t length_b);
+
+/** \fn sha1_ctx2hash(sha1_hash_t *dest, sha1_ctx_t *state)
+ * \brief convert a state variable into an actual hash value
+ * Writes the hash value corresponding to the state to the memory pointed by dest.
+ * \param dest pointer to the hash value destination
+ * \param state pointer to the hash context
+ */ 
+void sha1_ctx2hash (sha1_hash_t *dest, sha1_ctx_t *state);
+
+/** \fn sha1(sha1_hash_t *dest, const void* msg, uint32_t length_b)
+ * \brief hashing a message which in located entirely in RAM
+ * This function automatically hashes a message which is entirely in RAM with
+ * the SHA-1 hashing algorithm.
+ * \param dest pointer to the hash value destination
+ * \param msg  pointer to the message which should be hashed
+ * \param length_b length of the message in bits
+ */ 
+void sha1(sha1_hash_t *dest, const void* msg, uint32_t length_b);
+
+
+
+#endif /*SHA1_H_*/
diff --git a/sha256-asm.S b/sha256-asm.S
deleted file mode 100644
index d9eb6b6..0000000
--- a/sha256-asm.S
+++ /dev/null
@@ -1,1042 +0,0 @@
-/* sha256-asm.S */
-/*
-    This file is part of the AVR-Crypto-Lib.
-    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-/*
- * Author:	Daniel Otte
- *
- * License: GPLv3 or later
-*/
-; sha-256 implementation in assembler	
-SHA256_BLOCK_BITS = 512
-SHA256_HASH_BITS = 256
-
-.macro precall
-	/* push r18 - r27, r30 - r31*/
-	push r0
-	push r1
-	push r18
-	push r19
-	push r20
-	push r21
-	push r22
-	push r23
-	push r24
-	push r25
-	push r26
-	push r27
-	push r30
-	push r31
-	clr r1
-.endm
-
-.macro postcall
-	pop r31
-	pop r30
-	pop r27
-	pop r26
-	pop r25
-	pop r24
-	pop r23
-	pop r22
-	pop r21
-	pop r20
-	pop r19
-	pop r18
-	pop r1
-	pop r0
-.endm
-
-
-.macro hexdump length
-	push r27
-	push r26
-	ldi r25, '\r'
-	mov r24, r25
-	call uart_putc
-	ldi r25, '\n'
-	mov r24, r25
-	call uart_putc
-	pop r26
-	pop r27
-	movw r24, r26
-.if \length > 16
-	ldi r22, lo8(16)
-	ldi r23, hi8(16)
-	push r27
-	push r26
-	call uart_hexdump
-	pop r26
-	pop r27
-	adiw r26, 16
-	hexdump \length-16
-.else
-	ldi r22, lo8(\length)
-	ldi r23, hi8(\length)
-	call uart_hexdump
-.endif
-.endm
-
-/* X points to Block */
-.macro dbg_hexdump length
-	precall
-	hexdump \length
-	postcall
-.endm
-
-.section .text
-
-SPL = 0x3D
-SPH = 0x3E
-SREG = 0x3F
-
-
-;
-;sha256_ctx_t is:
-;
-; [h0][h1][h2][h3][h4][h5][h6][h7][length]
-; hn is 32 bit large, length is 64 bit large
-
-;###########################################################	
-
-.global sha256_ctx2hash
-; === sha256_ctx2hash ===
-; this function converts a state into a normal hash (bytestring)
-;  param1: the 16-bit destination pointer
-;	given in r25,r24 (r25 is most significant)
-;  param2: the 16-bit pointer to sha256_ctx structure
-;	given in r23,r22
-sha256_ctx2hash:
-	movw r26, r22
-	movw r30, r24
-	ldi r21, 8
-	sbiw r26, 4
-1:	
-	ldi r20, 4
-	adiw r26, 8
-2:	
-		ld r0, -X
-		st Z+, r0	
-	dec r20
-	brne 2b
-	
-	dec r21
-	brne 1b
-	
-	ret
-
-;###########################################################	
-
-.global sha256
-; === sha256 ===
-; this function calculates SHA-256 hashes from messages in RAM
-;  param1: the 16-bit hash destination pointer
-;	given in r25,r24 (r25 is most significant)
-;  param2: the 16-bit pointer to message
-;	given in r23,r22
-;  param3: 32-bit length value (length of message in bits)
-;   given in r21,r20,r19,r18
-sha256:
-sha256_prolog:
-	push r8
-	push r9
-	push r10
-	push r11
-	push r12
-	push r13
-	push r16
-	push r17
-	in r16, SPL
-	in r17, SPH
-	subi r16, 8*4+8 
-	sbci r17, 0	
-	in r0, SREG
-	cli
-	out SPL, r16
-	out SPH, r17
-	out SREG, r0
-	
-	push r25
-	push r24
-	inc r16
-	adc r17, r1
-	
-	movw r8, r18		/* backup of length*/
-	movw r10, r20
-	
-	movw r12, r22	/* backup pf msg-ptr */
-	
-	movw r24, r16
-	rcall sha256_init
-	/* if length >= 512 */
-1:
-	tst r11
-	brne 4f
-	tst r10
-	brne 4f
-	mov r19, r9
-	cpi r19, 0x02
-	brlo 4f
-	
-	movw r24, r16
-	movw r22, r12
-	rcall sha256_nextBlock
-	ldi r19, 0x64
-	add r22, r19
-	adc r23, r1
-	/* length -= 512 */
-	ldi r19, 0x02
-	sub r9, r19
-	sbc r10, r1
-	sbc r11, r1
-	rjmp 1b
-	
-4:
-	movw r24, r16
-	movw r22, r12
-	movw r20, r8
-	rcall sha256_lastBlock
-	
-	pop r24
-	pop r25
-	movw r22, r16
-	rcall sha256_ctx2hash	
-	
-sha256_epilog:
-	in r30, SPL
-	in r31, SPH
-	adiw r30, 8*4+8 	
-	in r0, SREG
-	cli
-	out SPL, r30
-	out SPH, r31
-	out SREG, r0
-	pop r17
-	pop r16
-	pop r13
-	pop r12
-	pop r11
-	pop r10
-	pop r9
-	pop r8
-	ret
-
-;###########################################################	
-
-
-; block MUST NOT be larger than 64 bytes
-
-.global sha256_lastBlock
-; === sha256_lastBlock ===
-; this function does padding & Co. for calculating SHA-256 hashes
-;  param1: the 16-bit pointer to sha256_ctx structure
-;	given in r25,r24 (r25 is most significant)
-;  param2: an 16-bit pointer to 64 byte block to hash
-;	given in r23,r22
-;  param3: an 16-bit integer specifing length of block in bits
-;	given in r21,r20
-sha256_lastBlock_localSpace = (SHA256_BLOCK_BITS/8+1)
-
-
-sha256_lastBlock:
-	cpi r21, 0x02
-	brlo sha256_lastBlock_prolog
-	push r25
-	push r24
-	push r23
-	push r22
-	push r21
-	push r20
-	rcall sha256_nextBlock
-	pop r20
-	pop r21
-	pop r22
-	pop r23
-	pop r24
-	pop r25
-	subi r21, 0x02
-	subi r23, -2
-	rjmp sha256_lastBlock	
-sha256_lastBlock_prolog:
-	/* allocate space on stack */
-	in r30, SPL
-	in r31, SPH
-	in r1, SREG
-	subi r30, lo8(64)
-	sbci r31, hi8(64)
-	cli
-	out SPL, r30
-	out SPH, r31
-	out SREG,r1
-
-	adiw r30, 1 /* SP points to next free byte on stack */
-	mov r18, r20 /* r20 = LSB(length) */
-	lsr r18
-	lsr r18
-	lsr r18
-	bst r21, 0	/* may be we should explain this ... */
-	bld r18, 5  /* now: r18 == length/8 (aka. length in bytes) */
-	
-	
-	movw r26, r22 /* X points to begin of msg */
-	tst r18
-	breq sha256_lastBlock_post_copy
-	mov r1, r18
-sha256_lastBlock_copy_loop:
-	ld r0, X+
-	st Z+, r0
-	dec r1
-	brne sha256_lastBlock_copy_loop
-sha256_lastBlock_post_copy:	
-sha256_lastBlock_insert_stuffing_bit:	
-	ldi r19, 0x80
-	mov r0,r19 	
-	ldi r19, 0x07
-	and r19, r20 /* if we are in bitmode */
-	breq 2f	/* no bitmode */
-1:	
-	lsr r0
-	dec r19
-	brne 1b
-	ld r19, X
-/* maybe we should do some ANDing here, just for safety */
-	or r0, r19
-2:	
-	st Z+, r0
-	inc r18
-
-/* checking stuff here */
-	cpi r18, 64-8+1
-	brsh 0f 
-	rjmp sha256_lastBlock_insert_zeros
-0:
-	/* oh shit, we landed here */
-	/* first we have to fill it up with zeros */
-	ldi r19, 64
-	sub r19, r18
-	breq 2f
-1:	
-	st Z+, r1
-	dec r19
-	brne 1b	
-2:	
-	sbiw r30, 63
-	sbiw r30,  1
-	movw r22, r30
-	
-	push r31
-	push r30
-	push r25
-	push r24
-	push r21
-	push r20
-	rcall sha256_nextBlock
-	pop r20
-	pop r21
-	pop r24
-	pop r25
-	pop r30
-	pop r31
-	
-	/* now we should subtract 512 from length */
-	movw r26, r24
-	adiw r26, 4*8+1 /* we can skip the lowest byte */
-	ld r19, X
-	subi r19, hi8(512)
-	st X+, r19
-	ldi r18, 6
-1:
-	ld r19, X
-	sbci r19, 0
-	st X+, r19
-	dec r18
-	brne 1b
-	
-;	clr r18 /* not neccessary ;-) */
-	/* reset Z pointer to begin of block */
-
-sha256_lastBlock_insert_zeros:	
-	ldi r19, 64-8
-	sub r19, r18
-	breq sha256_lastBlock_insert_length
-	clr r1
-1:
-	st Z+, r1	/* r1 is still zero */
-	dec r19
-	brne 1b
-
-;	rjmp sha256_lastBlock_epilog
-sha256_lastBlock_insert_length:
-	movw r26, r24	/* X points to state */
-	adiw r26, 8*4	/* X points to (state.length) */
-	adiw r30, 8		/* Z points one after the last byte of block */
-	ld r0, X+
-	add r0, r20
-	st -Z, r0
-	ld r0, X+
-	adc r0, r21
-	st -Z, r0
-	ldi r19, 6
-1:
-	ld r0, X+
-	adc r0, r1
-	st -Z, r0
-	dec r19
-	brne 1b
-
-	sbiw r30, 64-8
-	movw r22, r30
-	rcall sha256_nextBlock
-
-sha256_lastBlock_epilog:
-	in r30, SPL
-	in r31, SPH
-	in r1, SREG
-	adiw r30, 63 ; lo8(64)
-	adiw r30,  1  ; hi8(64)
-	cli
-	out SPL, r30
-	out SPH, r31
-	out SREG,r1
-	clr r1
-	clr r0
-	ret
-
-/**/
-;###########################################################	
-
-.global sha256_nextBlock
-; === sha256_nextBlock ===
-; this is the core function for calculating SHA-256 hashes
-;  param1: the 16-bit pointer to sha256_ctx structure
-;	given in r25,r24 (r25 is most significant)
-;  param2: an 16-bit pointer to 64 byte block to hash
-;	given in r23,r22
-sha256_nextBlock_localSpace = (64+8)*4 ; 64 32-bit values for w array and 8 32-bit values for a array (total 288 byte)
-
-Bck1 = 12
-Bck2 = 13
-Bck3 = 14
-Bck4 = 15
-Func1 = 22
-Func2 = 23
-Func3 = 24
-Func4 = 25
-Accu1 = 16
-Accu2 = 17
-Accu3 = 18
-Accu4 = 19
-XAccu1 = 8
-XAccu2 = 9
-XAccu3 = 10
-XAccu4 = 11
-T1	= 4
-T2	= 5
-T3	= 6
-T4	= 7
-LoopC = 1
-/* byteorder: high number <--> high significance */
-sha256_nextBlock:
- ; initial, let's make some space ready for local vars
-	push r4 /* replace push & pop by mem ops? */
-	push r5
-	push r6
-	push r7
-	push r8
-	push r9
-	push r10
-	push r11
-	push r12
-	push r13
-	push r14
-	push r15
-	push r16
-	push r17
-	push r28
-	push r29
-	in r20, SPL
-	in r21, SPH
-	movw r18, r20			;backup SP
-;	movw r26, r20			; X points to free space on stack 
-	movw r30, r22			; Z points to message
-	subi r20, lo8(sha256_nextBlock_localSpace) ;sbiw can do only up to 63
-	sbci r21, hi8(sha256_nextBlock_localSpace)
-	movw r26, r20			; X points to free space on stack 
-	in r0, SREG
-	cli ; we want to be uninterrupted while updating SP
-	out SPL, r20
-	out SPH, r21
-	out SREG, r0
-	push r18
-	push r19
-	push r24
-	push r25 /* param1 will be needed later */
- ; now we fill the w array with message (think about endianess)
- 	adiw r26, 1 ; X++
- 	ldi r20, 16
-sha256_nextBlock_wcpyloop: 	
- 	ld r23, Z+
- 	ld r22, Z+
- 	ld r19, Z+
- 	ld r18, Z+
- 	st X+, r18
- 	st X+, r19
- 	st X+, r22	
-	st X+, r23
-	dec r20
-	brne sha256_nextBlock_wcpyloop
-/*	for (i=16; i<64; ++i){
-		w[i] = SIGMA_b(w[i-2]) + w[i-7] + SIGMA_a(w[i-15]) + w[i-16];	
-	} */
-	/* r25,r24,r23,r24 (r21,r20) are function values
-	   r19,r18,r17,r16 are the accumulator
-	   r15,r14,r13,rBck1 are backup1
-	   r11,r10,r9 ,r8  are xor accu   
-	   r1 is round counter 								*/
-
-	ldi r20, 64-16
-	mov LoopC, r20
-sha256_nextBlock_wcalcloop:		 
-	movw r30, r26 ; cp X to Z
-	sbiw r30, 63
-	sbiw r30, 1 		; substract 64 = 16*4
-	ld Accu1, Z+
-	ld Accu2, Z+
-	ld Accu3, Z+
-	ld Accu4, Z+ /* w[i] = w[i-16] */
-	ld Bck1, Z+
-	ld Bck2, Z+
-	ld Bck3, Z+
-	ld Bck4, Z+ /* backup = w[i-15] */
-	/* now sigma 0 */
-	mov Func1, Bck2
-	mov Func2, Bck3
-	mov Func3, Bck4
-	mov Func4, Bck1  /* prerotated by 8 */
-	ldi r20, 1
-	rcall bitrotl
-	movw XAccu1, Func1
-	movw XAccu3, Func3	 /* store ROTR(w[i-15],7) in xor accu */
-	movw Func1, Bck3
-	movw Func3, Bck1 /* prerotated by 16 */
-	ldi r20, 2
-	rcall bitrotr
-	eor XAccu1, Func1  /* xor ROTR(w[i-15], 18)*/
-	eor XAccu2, Func2
-	eor XAccu3, Func3
-	eor XAccu4, Func4
-	ldi Func2, 3		 /* now shr3 */ /*we can destroy backup now*/
-sigma0_shr:
-	lsr Bck4
-	ror Bck3
-	ror Bck2
-	ror Bck1	
-	dec Func2
-	brne sigma0_shr
-	eor XAccu1, Bck1
-	eor XAccu2, Bck2
-	eor XAccu3, Bck3
-	eor XAccu4, Bck4	/* xor SHR(w[i-15], 3)*/ /* xor accu == sigma1(w[i-15]) */
-	add Accu1, XAccu1
-	adc Accu2, XAccu2
-	adc Accu3, XAccu3
-	adc Accu4, XAccu4 /* finished with sigma0 */
-	ldd Func1, Z+7*4  /* now accu += w[i-7] */
-	ldd Func2, Z+7*4+1
-	ldd Func3, Z+7*4+2
-	ldd Func4, Z+7*4+3
-	add Accu1, Func1
-	adc Accu2, Func2
-	adc Accu3, Func3
-	adc Accu4, Func4
-	ldd Bck1, Z+12*4 /* now backup = w[i-2]*/
-	ldd Bck2, Z+12*4+1
-	ldd Bck3, Z+12*4+2
-	ldd Bck4, Z+12*4+3
-	/* now sigma 1 */
-	movw Func1, Bck3
-	movw Func3, Bck1 /* prerotated by 16 */
-	ldi r20, 1
-	rcall bitrotr
-	movw XAccu3, Func3
-	movw XAccu1, Func1	 /* store in ROTR(w[i-2], 17) xor accu */
-;	movw Func1, Bck3
-;	movw Func3, Bck1 /* prerotated by 16 */
-	ldi r20, 2
-	rcall bitrotr
-	eor XAccu1, Func1  /* xor ROTR(w[i-2], 19)*/
-	eor XAccu2, Func2
-	eor XAccu3, Func3
-	eor XAccu4, Func4
-	ldi Func2, 2	 /* now shr10 (dirty trick, skipping a byte) */ /*we can destroy backup now*/
-sigma1_shr:
-	lsr Bck4
-	ror Bck3
-	ror Bck2	
-	dec Func2
-	brne sigma1_shr
-	eor XAccu1, Bck2
-	eor XAccu2, Bck3
-	eor XAccu3, Bck4  /* xor SHR(w[i-2], 10)*/ /* xor accu == sigma1(w[i-15]) */
-	add Accu1, XAccu1
-	adc Accu2, XAccu2
-	adc Accu3, XAccu3
-	adc Accu4, XAccu4 /* finished with sigma0 */
-	/* now let's store the shit */
-	st X+, Accu1
-	st X+, Accu2
-	st X+, Accu3
-	st X+, Accu4
-	dec LoopC
-	breq 3f  ; skip if zero
-	rjmp sha256_nextBlock_wcalcloop
-3:
-	/* we are finished with w array X points one byte post w */
-/* init a array */
-	pop r31
-	pop r30
-	push r30
-	push r31
-	ldi r25, 8*4 /* 8 32-bit values to copy from ctx to a array */
-init_a_array:	
-	ld r1, Z+
-	st X+, r1
-	dec r25
-	brne init_a_array
-	
-/* now the real fun begins */
-/* for (i=0; i<64; ++i){
-			t1 = a[7] + SIGMA1(a[4]) + CH(a[4],a[5],a[6]) + k[i] + w[i];
-			t2 = SIGMA0(a[0]) + MAJ(a[0],a[1],a[2]);
-			memmove(&(a[1]), &(a[0]), 7*4); 	// a[7]=a[6]; a[6]=a[5]; a[5]=a[4]; a[4]=a[3]; a[3]=a[2]; a[2]=a[1]; a[1]=a[0]; 
-			a[4] += t1;
-			a[0] = t1 + t2;
-		} */
-	/* Y points to a[0], Z ('cause lpm wants it) points to k[i], X points to w[i] */
-	sbiw r26, 8*4  /* X still points at a[7]+1*/
-	movw r28, r26
-	ldi r30, lo8(sha256_kv)
-	ldi r31, hi8(sha256_kv)		
-	dec r27  /* X - (64*4 == 256) */
-	ldi r25, 64
-	mov LoopC, r25
-sha256_main_loop:
-	/* now calculate t1 */
-	 /*CH(x,y,z) = (x&y)^((~x)&z)*/
-	ldd T1, Y+5*4
-	ldd T2, Y+5*4+1
-	ldd T3, Y+5*4+2
-	ldd T4, Y+5*4+3 /* y in T */
-	ldd Func1, Y+4*4
-	ldd Func2, Y+4*4+1
-	ldd Func3, Y+4*4+2
-	ldd Func4, Y+4*4+3  /* x in Func */
-	ldd Bck1, Y+6*4
-	ldd Bck2, Y+6*4+1
-	ldd Bck3, Y+6*4+2
-	ldd Bck4, Y+6*4+3 /* z in Bck */
-	and T1, Func1
-	and T2, Func2
-	and T3, Func3
-	and T4, Func4
-	com Func1
-	com Func2
-	com Func3
-	com Func4
-	and Bck1, Func1
-	and Bck2, Func2
-	and Bck3, Func3
-	and Bck4, Func4
-	eor T1, Bck1
-	eor T2, Bck2
-	eor T3, Bck3
-	eor T4, Bck4 /* done, CH(x,y,z) is in T */
-	/* now SIGMA1(a[4]) */
-	ldd Bck4, Y+4*4		/* think about using it from Func reg above*/
-	ldd Bck1, Y+4*4+1	
-	ldd Bck2, Y+4*4+2
-	ldd Bck3, Y+4*4+3 /* load prerotate by 8-bit */	
-	movw Func1, Bck1
-	movw Func3, Bck3
-	ldi r20, 2 
-	rcall bitrotl		/* rotr(x,6) */ 
-	movw XAccu1, Func1
-	movw XAccu3, Func3
-	movw Func1, Bck1
-	movw Func3, Bck3
-	ldi r20, 3 
-	rcall bitrotr 	/* rotr(x,11) */
-	eor XAccu1, Func1
-	eor XAccu2, Func2
-	eor XAccu3, Func3
-	eor XAccu4, Func4
-	movw Func1, Bck3 /* this prerotates furteh 16 bits*/
-	movw Func3, Bck1 /* so we have now prerotated by 24 bits*/
-	ldi r20, 1 
-	rcall bitrotr 	/* rotr(x,11) */
-	eor XAccu1, Func1
-	eor XAccu2, Func2
-	eor XAccu3, Func3
-	eor XAccu4, Func4 /* finished with SIGMA1, add it to T */
-	add T1, XAccu1
-	adc T2, XAccu2
-	adc T3, XAccu3
-	adc T4, XAccu4
-	/* now we've to add a[7], w[i] and k[i] */
-	ldd XAccu1, Y+4*7
-	ldd XAccu2, Y+4*7+1
-	ldd XAccu3, Y+4*7+2
-	ldd XAccu4, Y+4*7+3
-	add T1, XAccu1
-	adc T2, XAccu2
-	adc T3, XAccu3
-	adc T4, XAccu4 /* add a[7] */
-	ld XAccu1, X+
-	ld XAccu2, X+
-	ld XAccu3, X+
-	ld XAccu4, X+
-	add T1, XAccu1
-	adc T2, XAccu2
-	adc T3, XAccu3
-	adc T4, XAccu4 /* add w[i] */
-	lpm XAccu1, Z+
-	lpm XAccu2, Z+
-	lpm XAccu3, Z+
-	lpm XAccu4, Z+
-	add T1, XAccu1
-	adc T2, XAccu2
-	adc T3, XAccu3
-	adc T4, XAccu4 /* add k[i] */ /* finished with t1 */
-	/*now t2 = SIGMA0(a[0]) + MAJ(a[0],a[1],a[2]) */ /*i did to much x86 asm, i always see 4 32bit regs*/
-		/* starting with MAJ(x,y,z) */
-	ldd Func1, Y+4*0+0
-	ldd Func2, Y+4*0+1
-	ldd Func3, Y+4*0+2
-	ldd Func4, Y+4*0+3 /* load x=a[0] */
-	ldd XAccu1, Y+4*1+0
-	ldd XAccu2, Y+4*1+1
-	ldd XAccu3, Y+4*1+2
-	ldd XAccu4, Y+4*1+3 /* load y=a[1] */
-	and XAccu1, Func1
-	and XAccu2, Func2
-	and XAccu3, Func3
-	and XAccu4, Func4	/* XAccu == (x & y) */
-	ldd Bck1, Y+4*2+0
-	ldd Bck2, Y+4*2+1
-	ldd Bck3, Y+4*2+2
-	ldd Bck4, Y+4*2+3 /* load z=a[2] */
-	and Func1, Bck1
-	and Func2, Bck2
-	and Func3, Bck3
-	and Func4, Bck4
-	eor XAccu1, Func1
-	eor XAccu2, Func2
-	eor XAccu3, Func3
-	eor XAccu4, Func4	/* XAccu == (x & y) ^ (x & z) */
-	ldd Func1, Y+4*1+0
-	ldd Func2, Y+4*1+1
-	ldd Func3, Y+4*1+2
-	ldd Func4, Y+4*1+3 /* load y=a[1] */
-	and Func1, Bck1
-	and Func2, Bck2
-	and Func3, Bck3
-	and Func4, Bck4
-	eor XAccu1, Func1
-	eor XAccu2, Func2
-	eor XAccu3, Func3
-	eor XAccu4, Func4	/* XAccu == Maj(x,y,z) == (x & y) ^ (x & z) ^ (y & z) */
-   	/* SIGMA0(a[0]) */
-	ldd Bck1, Y+4*0+0 /* we should combine this with above */
-	ldd Bck2, Y+4*0+1
-	ldd Bck3, Y+4*0+2
-	ldd Bck4, Y+4*0+3
-	movw Func1, Bck1
-	movw Func3, Bck3
-	ldi r20, 2
-	rcall bitrotr
-	movw Accu1, Func1
-	movw Accu3, Func3 /* Accu = shr(a[0], 2) */
-	movw Func1, Bck3 
-	movw Func3, Bck1 /* prerotate by 16 bits */
-	ldi r20, 3
-	rcall bitrotl
-	eor Accu1, Func1
-	eor Accu2, Func2
-	eor Accu3, Func3
-	eor Accu4, Func4 /* Accu ^= shr(a[0], 13) */
-	mov Func1, Bck4
-	mov Func2, Bck1
-	mov Func3, Bck2
-	mov Func4, Bck3  /* prerotate by 24 bits */
-	ldi r20, 2
-	rcall bitrotl
-	eor Accu1, Func1
-	eor Accu2, Func2
-	eor Accu3, Func3
-	eor Accu4, Func4 /* Accu ^= shr(a[0], 22) */
-	add Accu1, XAccu1 /* add previous result (MAJ)*/
-	adc Accu2, XAccu2
-	adc Accu3, XAccu3
-	adc Accu4, XAccu4
-	/* now we are finished with the computing stuff (t1 in T, t2 in Accu)*/
-	/* a[7]=a[6]; a[6]=a[5]; a[5]=a[4]; a[4]=a[3]; a[3]=a[2]; a[2]=a[1]; a[1]=a[0]; */
-
-	ldi r21, 7*4
-	adiw r28, 7*4
-a_shift_loop:
-	ld  r25, -Y /* warning: this is PREdecrement */
-	std Y+4, r25
-	dec r21
-	brne a_shift_loop
-
-	ldd Bck1, Y+4*4+0
-	ldd Bck2, Y+4*4+1
-	ldd Bck3, Y+4*4+2
-	ldd Bck4, Y+4*4+3
-	add Bck1, T1
-	adc Bck2, T2
-	adc Bck3, T3
-	adc Bck4, T4
-	std Y+4*4+0, Bck1
-	std Y+4*4+1, Bck2
-	std Y+4*4+2, Bck3
-	std Y+4*4+3, Bck4
-	add Accu1, T1
-	adc Accu2, T2
-	adc Accu3, T3
-	adc Accu4, T4
-	std Y+4*0+0, Accu1
-	std Y+4*0+1, Accu2
-	std Y+4*0+2, Accu3
-	std Y+4*0+3, Accu4 /* a array updated */
-	
-	
-	dec LoopC
-	breq update_state
-	rjmp sha256_main_loop ;brne sha256_main_loop
-update_state:	
-	/* update state */
-	/* pointers to state should still exist on the stack ;-) */
-	pop r31
-	pop r30
-	ldi r21, 8
-update_state_loop:
-	ldd Accu1, Z+0
-	ldd Accu2, Z+1
-	ldd Accu3, Z+2
-	ldd Accu4, Z+3 
-	ld Func1, Y+
-	ld Func2, Y+
-	ld Func3, Y+
-	ld Func4, Y+
-	add Accu1, Func1
-	adc Accu2, Func2
-	adc Accu3, Func3
-	adc Accu4, Func4
-	st Z+, Accu1
-	st Z+, Accu2
-	st Z+, Accu3
-	st Z+, Accu4
-	dec r21
-	brne update_state_loop
-	/* now we just have to update the length */
-	adiw r30, 1 /* since we add 512, we can simply skip the LSB */ 
-	ldi r21, 2
-	ldi r22, 6
-	ld r20, Z
-	add r20, r21
-	st Z+, r20	
-	clr r21
-sha256_nextBlock_fix_length:	
-	brcc sha256_nextBlock_epilog
-	ld r20, Z
-	adc r20, r21
-	st Z+, r20
-	dec r22
-	brne sha256_nextBlock_fix_length
-	
-; EPILOG
-sha256_nextBlock_epilog:
-/* now we should clean up the stack */
-	
-	pop r21
-	pop r20
-	in r0, SREG
-	cli ; we want to be uninterrupted while updating SP
-	out SPL, r20
-	out SPH, r21
-	out SREG, r0
-	
-	clr r1
-	pop r29
-	pop r28
-	pop r17
-	pop r16
-	pop r15
-	pop r14
-	pop r13
-	pop r12
-	pop r11
-	pop r10
-	pop r9
-	pop r8
-	pop r7
-	pop r6
-	pop r5
-	pop r4 
-	ret
-
-sha256_kv: ; round-key-vector stored in ProgMem 
-.word	0x2f98, 0x428a, 0x4491, 0x7137, 0xfbcf, 0xb5c0, 0xdba5, 0xe9b5, 0xc25b, 0x3956, 0x11f1, 0x59f1, 0x82a4, 0x923f, 0x5ed5, 0xab1c
-.word	0xaa98, 0xd807, 0x5b01, 0x1283, 0x85be, 0x2431, 0x7dc3, 0x550c, 0x5d74, 0x72be, 0xb1fe, 0x80de, 0x06a7, 0x9bdc, 0xf174, 0xc19b
-.word	0x69c1, 0xe49b, 0x4786, 0xefbe, 0x9dc6, 0x0fc1, 0xa1cc, 0x240c, 0x2c6f, 0x2de9, 0x84aa, 0x4a74, 0xa9dc, 0x5cb0, 0x88da, 0x76f9
-.word	0x5152, 0x983e, 0xc66d, 0xa831, 0x27c8, 0xb003, 0x7fc7, 0xbf59, 0x0bf3, 0xc6e0, 0x9147, 0xd5a7, 0x6351, 0x06ca, 0x2967, 0x1429
-.word	0x0a85, 0x27b7, 0x2138, 0x2e1b, 0x6dfc, 0x4d2c, 0x0d13, 0x5338, 0x7354, 0x650a, 0x0abb, 0x766a, 0xc92e, 0x81c2, 0x2c85, 0x9272
-.word	0xe8a1, 0xa2bf, 0x664b, 0xa81a, 0x8b70, 0xc24b, 0x51a3, 0xc76c, 0xe819, 0xd192, 0x0624, 0xd699, 0x3585, 0xf40e, 0xa070, 0x106a
-.word	0xc116, 0x19a4, 0x6c08, 0x1e37, 0x774c, 0x2748, 0xbcb5, 0x34b0, 0x0cb3, 0x391c, 0xaa4a, 0x4ed8, 0xca4f, 0x5b9c, 0x6ff3, 0x682e
-.word	0x82ee, 0x748f, 0x636f, 0x78a5, 0x7814, 0x84c8, 0x0208, 0x8cc7, 0xfffa, 0x90be, 0x6ceb, 0xa450, 0xa3f7, 0xbef9, 0x78f2, 0xc671
-
-	
-;###########################################################	
-
-.global sha256_init 
-;uint32_t sha256_init_vector[]={
-;  	0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A,
-;	0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19 };
-;
-;void sha256_init(sha256_ctx_t *state){
-;	state->length=0;
-;	memcpy(state->h, sha256_init_vector, 8*4);
-;}
-; param1: (r23,r24) 16-bit pointer to sha256_ctx_t struct in ram
-; modifys: Z(r30,r31), Func1, r22
-sha256_init:
-	movw r26, r24 ; (24,25) --> (26,27) load X with param1
-	ldi r30, lo8((sha256_init_vector))
-	ldi r31, hi8((sha256_init_vector))
-	ldi r22, 32+8
-sha256_init_vloop:	
-	lpm r23, Z+ 
-	st X+, r23
-	dec r22
-	brne sha256_init_vloop
-	ret
-	
-sha256_init_vector:
-.word 0xE667, 0x6A09
-.word 0xAE85, 0xBB67 
-.word 0xF372, 0x3C6E 
-.word 0xF53A, 0xA54F 
-.word 0x527F, 0x510E 
-.word 0x688C, 0x9B05 
-.word 0xD9AB, 0x1F83 
-.word 0xCD19, 0x5BE0
-.word 0x0000, 0x0000
-.word 0x0000, 0x0000
-
-;###########################################################	
-
-.global rotl32
-; === ROTL32 ===
-; function that rotates a 32 bit word to the left
-;  param1: the 32-bit word to rotate
-;	given in r25,r24,r23,r22 (r25 is most significant)
-;  param2: an 8-bit value telling how often to rotate
-;	given in r20
-; modifys: r21, r22
-rotl32:
-	cpi r20, 8
-	brlo bitrotl
-	mov r21, r25
-	mov r25, r24
-	mov r24, r23
-	mov r23, r22
-	mov r22, r21
-	subi r20, 8
-	rjmp rotl32
-bitrotl:
-	clr r21
-	clc
-bitrotl_loop:	
-	tst r20
-	breq fixrotl
-	rol r22
-	rol r23
-	rol r24
-	rol r25
-	rol r21
-	dec r20
-	rjmp bitrotl_loop
-fixrotl:
-	or r22, r21
-	ret
-	
-
-;###########################################################	
-
-.global rotr32
-; === ROTR32 ===
-; function that rotates a 32 bit word to the right
-;  param1: the 32-bit word to rotate
-;	given in r25,r24,r23,22 (r25 is most significant)
-;  param2: an 8-bit value telling how often to rotate
-;	given in r20
-; modifys: r21, r22
-rotr32:
-	cpi r20, 8
-	brlo bitrotr
-	mov r21, r22
-	mov r22, r23
-	mov r23, r24
-	mov r24, r25
-	mov r25, r21
-	subi r20, 8
-	rjmp rotr32
-bitrotr:
-	clr r21
-	clc
-bitrotr_loop:	
-	tst r20
-	breq fixrotr
-	ror r25
-	ror r24
-	ror r23
-	ror r22
-	ror r21
-	dec r20
-	rjmp bitrotr_loop
-fixrotr:
-	or r25, r21
-	ret
-	
-	
-;###########################################################	
-	
-.global change_endian32
-; === change_endian32 ===
-; function that changes the endianess of a 32-bit word
-;  param1: the 32-bit word
-;	given in r25,r24,r23,22 (r25 is most significant)
-;  modifys: r21, r22
-change_endian32:
-	movw r20,  r22 ; (r22,r23) --> (r20,r21)
-	mov r22, r25
-	mov r23, r24
-	mov r24, r21
-	mov r25, r20 
-	ret
-
diff --git a/sha256.c b/sha256.c
deleted file mode 100644
index f654968..0000000
--- a/sha256.c
+++ /dev/null
@@ -1,231 +0,0 @@
-/* sha256.c */
-/*
-    This file is part of the AVR-Crypto-Lib.
-    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-/**
- * \file		sha256.c
- * \author		Daniel Otte 
- * \date		16.05.2006
- * 
- * \par License:	
- * 	GPL
- * 
- * \brief SHA-256 implementation.
- * 
- * 
- */
-
-#include <stdint.h>
-#include <string.h> /* for memcpy, memmove, memset */
-#include "sha256.h"
-
-#define LITTLE_ENDIAN
-
-#if defined LITTLE_ENDIAN
-#elif defined BIG_ENDIAN
-#else
-	#error specify endianess!!!
-#endif
-
-
-/*************************************************************************/
-
-uint32_t sha256_init_vector[]={
-	0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A,
-    0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19 };
-
-
-/*************************************************************************/
-
-/**
- * \brief \c sh256_init initialises a sha256 context for hashing. 
- * \c sh256_init c initialises the given sha256 context for hashing
- * @param state pointer to a sha256 context
- * @return none
- */
-void sha256_init(sha256_ctx_t *state){
-	state->length=0;
-	memcpy(state->h, sha256_init_vector, 8*4);
-}
-
-/*************************************************************************/
-
-/**
- * rotate x right by n positions
- */
-uint32_t rotr32( uint32_t x, uint8_t n){
-	return ((x>>n) | (x<<(32-n)));
-}
-
-
-/*************************************************************************/
-
-// #define CHANGE_ENDIAN32(x) (((x)<<24) | ((x)>>24) | (((x)& 0x0000ff00)<<8) | (((x)& 0x00ff0000)>>8))
-
-uint32_t change_endian32(uint32_t x){
-	return (((x)<<24) | ((x)>>24) | (((x)& 0x0000ff00)<<8) | (((x)& 0x00ff0000)>>8));
-}
-
-
-/*************************************************************************/
-
-/* sha256 functions as macros for speed and size, cause they are called only once */
-
-#define CH(x,y,z)  (((x)&(y)) ^ ((~(x))&(z)))
-#define MAJ(x,y,z) (((x)&(y)) ^ ((x)&(z)) ^ ((y)&(z)))
-
-#define SIGMA0(x) (rotr32((x),2) ^ rotr32((x),13) ^ rotr32((x),22))
-#define SIGMA1(x) (rotr32((x),6) ^ rotr32((x),11) ^ rotr32((x),25))
-#define SIGMA_a(x) (rotr32((x),7)  ^ rotr32((x),18) ^ ((x)>>3))
-#define SIGMA_b(x) (rotr32((x),17) ^ rotr32((x),19) ^ ((x)>>10))
-
-
-uint32_t k[]={
-	0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
-	0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
-	0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
-	0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
-	0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
-	0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
-	0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
-	0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
-};
-
-
-/*************************************************************************/
-
-/**
- * block must be, 512, Bit = 64, Byte, long !!!
- */
-void sha256_nextBlock (sha256_ctx_t *state, const void* block){
-	uint32_t w[64];	/* this is 256, byte, large, */
-	uint8_t  i;
-	uint32_t a[8],t1,t2;
-
-	/* init w */
-#if defined LITTLE_ENDIAN
-		for (i=0; i<16; ++i){
-			w[i]= change_endian32(((uint32_t*)block)[i]);
-		}
-#elif defined BIG_ENDIAN
-		memcpy((void*)w, block, 64);
-#endif
-		for (i=16; i<64; ++i){
-			w[i] = SIGMA_b(w[i-2]) + w[i-7] + SIGMA_a(w[i-15]) + w[i-16];	
-		}
-
-	/* init working variables */	
-		memcpy((void*)a,(void*)(state->h), 8*4);
-
-	/* do the, fun stuff, */
-		for (i=0; i<64; ++i){
-			t1 = a[7] + SIGMA1(a[4]) + CH(a[4],a[5],a[6]) + k[i] + w[i];
-			t2 = SIGMA0(a[0]) + MAJ(a[0],a[1],a[2]);
-			memmove(&(a[1]), &(a[0]), 7*4); 	/* a[7]=a[6]; a[6]=a[5]; a[5]=a[4]; a[4]=a[3]; a[3]=a[2]; a[2]=a[1]; a[1]=a[0]; */
-			a[4] += t1;
-			a[0] = t1 + t2;
-		}
-
-	/* update, the, state, */
-		for (i=0; i<8; ++i){
-			state->h[i] += a[i];
-		}	
-		state->length += 512;
-} 
-
-
-/*************************************************************************/
-
-/**
- * \brief function to process the last block being hashed
- * @param state Pointer to the context in which this block should be processed.
- * @param block Pointer to the message wich should be hashed.
- * @param length is the length of only THIS block in BITS not in bytes!
- *  bits are big endian, meaning high bits come first.
- * 	if you have a message with bits at the end, the byte must be padded with zeros 
- */
-void sha256_lastBlock(sha256_ctx_t *state, const void* block, uint16_t length){
-	uint8_t lb[SHA256_BLOCK_BITS/8]; /* local block */
-	state->length += length;
-	memcpy (&(lb[0]), block, length/8);
-	
-	/* set the final one bit */
-	if (length & 0x7){ // if we have single bits at the end
-		lb[length/8] = ((uint8_t*)(block))[length/8];
-	} else {
-		lb[length/8] = 0;
-	}
-	lb[length/8] |= 0x80>>(length & 0x7);
-	length =(length >> 3) + 1; /* from now on length contains the number of BYTES in lb*/
-	/* pad with zeros */
-	if (length>64-8){ /* not enouth space for 64bit length value */
-		memset((void*)(&(lb[length])), 0, 64-length);
-		sha256_nextBlock(state, lb);
-		state->length -= 512;
-		length = 0;	
-	}
-	memset((void*)(&(lb[length])), 0, 56-length);
-	/* store the 64bit length value */
-#if defined LITTLE_ENDIAN
-	 	/* this is now rolled up */
-	uint8_t i; 	
-	for (i=1; i<=8; ++i){
-		lb[55+i] = (uint8_t)(state->length>>(64- 8*i));
-	}
-#elif defined BIG_ENDIAN
-	*((uint64_t)&(lb[56])) = state->length;
-#endif
-	sha256_nextBlock(state, lb);
-}
-
-
-/*************************************************************************/
-
-/*
- * length in bits!
- */
-void sha256(sha256_hash_t *dest, const void* msg, uint32_t length){ /* length could be choosen longer but this is for ÂµC */
-	sha256_ctx_t s;
-	sha256_init(&s);
-	while(length >= SHA256_BLOCK_BITS){
-		sha256_nextBlock(&s, msg);
-		msg = (uint8_t*)msg + SHA256_BLOCK_BITS/8;
-		length -= SHA256_BLOCK_BITS;
-	}
-	sha256_lastBlock(&s, msg, length);
-	sha256_ctx2hash(dest,&s);
-}
-
-
-
-/*************************************************************************/
-
-void sha256_ctx2hash(sha256_hash_t *dest, const sha256_ctx_t *state){
-#if defined LITTLE_ENDIAN
-	uint8_t i;
-	for(i=0; i<8; ++i){
-		((uint32_t*)dest)[i] = change_endian32(state->h[i]);
-	}
-#elif BIG_ENDIAN
-	if (dest != state->h)
-		memcpy(dest, state->h, SHA256_HASH_BITS/8);
-#else
-# error unsupported endian type!
-#endif
-}
-
-
diff --git a/sha256.h b/sha256.h
deleted file mode 100644
index 24960a3..0000000
--- a/sha256.h
+++ /dev/null
@@ -1,122 +0,0 @@
-/* sha256.h */
-/*
-    This file is part of the AVR-Crypto-Lib.
-    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-/**
- * \file	sha256.h
- * \author  Daniel Otte 
- * \date    2006-05-16
- * \license	GPLv3 or later
- * 
- */
-
-#ifndef SHA256_H_
-#define SHA256_H_
-
-#define __LITTLE_ENDIAN__
-
-
-#include <stdint.h>
-
-/** \def SHA256_HASH_BITS
- * defines the size of a SHA-256 hash value in bits
- */
-
-/** \def SHA256_HASH_BYTES
- * defines the size of a SHA-256 hash value in bytes
- */
-
-/** \def SHA256_BLOCK_BITS
- * defines the size of a SHA-256 input block in bits
- */
-
-/** \def SHA256_BLOCK_BYTES
- * defines the size of a SHA-256 input block in bytes
- */
-
-#define SHA256_HASH_BITS  256
-#define SHA256_HASH_BYTES (SHA256_HASH_BITS/8)
-#define SHA256_BLOCK_BITS 512
-#define SHA256_BLOCK_BYTES (SHA256_BLOCK_BITS/8)
-
-/** \typedef sha256_ctx_t
- * \brief SHA-256 context type
- * 
- * A variable of this type may hold the state of a SHA-256 hashing process
- */
-typedef struct {
-	uint32_t h[8];
-	uint64_t length;
-} sha256_ctx_t;
-
-/** \typedef sha256_hash_t
- * \brief SHA-256 hash value type
- * 
- * A variable of this type may hold the hash value produced by the
- * sha256_ctx2hash(sha256_hash_t* dest, const sha256_ctx_t* state) function.
- */
-typedef uint8_t sha256_hash_t[SHA256_HASH_BYTES];
-
-/** \fn void sha256_init(sha256_ctx_t *state)
- * \brief initialise a SHA-256 context
- * 
- * This function sets a ::sha256_ctx_t to the initial values for hashing.
- * \param state pointer to the SHA-256 hashing context
- */
-void sha256_init(sha256_ctx_t *state);
-
-/** \fn void sha256_nextBlock (sha256_ctx_t* state, const void* block)
- * \brief update the context with a given block
- * 
- * This function updates the SHA-256 hash context by processing the given block
- * of fixed length.
- * \param state pointer to the SHA-256 hash context
- * \param block pointer to the block of fixed length (512 bit = 64 byte)
- */
-void sha256_nextBlock (sha256_ctx_t* state, const void* block);
-
-/** \fn void sha256_lastBlock(sha256_ctx_t* state, const void* block, uint16_t length_b)
- * \brief finalize the context with the given block 
- * 
- * This function finalizes the SHA-256 hash context by processing the given block
- * of variable length.
- * \param state pointer to the SHA-256 hash context
- * \param block pointer to the block of fixed length (512 bit = 64 byte)
- * \param length_b the length of the block in bits
- */
-void sha256_lastBlock(sha256_ctx_t* state, const void* block, uint16_t length_b);
-
-/** \fn void sha256_ctx2hash(sha256_hash_t* dest, const sha256_ctx_t* state)
- * \brief convert the hash state into the hash value
- * This function reads the context and writes the hash value to the destination
- * \param dest pointer to the location where the hash value should be written
- * \param state pointer to the SHA-256 hash context
- */
-void sha256_ctx2hash(sha256_hash_t* dest, const sha256_ctx_t* state);
-
-/** \fn void sha256(sha256_hash_t* dest, const void* msg, uint32_t length_b)
- * \brief simple SHA-256 hashing function for direct hashing
- * 
- * This function automaticaly hashes a given message of arbitary length with
- * the SHA-256 hashing algorithm.
- * \param dest pointer to the location where the hash value is going to be written to
- * \param msg pointer to the message thats going to be hashed
- * \param length_b length of the message in bits
- */
-void sha256(sha256_hash_t* dest, const void* msg, uint32_t length_b);
-
-#endif /*SHA256_H_*/
diff --git a/sha256/sha256-asm.S b/sha256/sha256-asm.S
new file mode 100644
index 0000000..d9eb6b6
--- /dev/null
+++ b/sha256/sha256-asm.S
@@ -0,0 +1,1042 @@
+/* sha256-asm.S */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/*
+ * Author:	Daniel Otte
+ *
+ * License: GPLv3 or later
+*/
+; sha-256 implementation in assembler	
+SHA256_BLOCK_BITS = 512
+SHA256_HASH_BITS = 256
+
+.macro precall
+	/* push r18 - r27, r30 - r31*/
+	push r0
+	push r1
+	push r18
+	push r19
+	push r20
+	push r21
+	push r22
+	push r23
+	push r24
+	push r25
+	push r26
+	push r27
+	push r30
+	push r31
+	clr r1
+.endm
+
+.macro postcall
+	pop r31
+	pop r30
+	pop r27
+	pop r26
+	pop r25
+	pop r24
+	pop r23
+	pop r22
+	pop r21
+	pop r20
+	pop r19
+	pop r18
+	pop r1
+	pop r0
+.endm
+
+
+.macro hexdump length
+	push r27
+	push r26
+	ldi r25, '\r'
+	mov r24, r25
+	call uart_putc
+	ldi r25, '\n'
+	mov r24, r25
+	call uart_putc
+	pop r26
+	pop r27
+	movw r24, r26
+.if \length > 16
+	ldi r22, lo8(16)
+	ldi r23, hi8(16)
+	push r27
+	push r26
+	call uart_hexdump
+	pop r26
+	pop r27
+	adiw r26, 16
+	hexdump \length-16
+.else
+	ldi r22, lo8(\length)
+	ldi r23, hi8(\length)
+	call uart_hexdump
+.endif
+.endm
+
+/* X points to Block */
+.macro dbg_hexdump length
+	precall
+	hexdump \length
+	postcall
+.endm
+
+.section .text
+
+SPL = 0x3D
+SPH = 0x3E
+SREG = 0x3F
+
+
+;
+;sha256_ctx_t is:
+;
+; [h0][h1][h2][h3][h4][h5][h6][h7][length]
+; hn is 32 bit large, length is 64 bit large
+
+;###########################################################	
+
+.global sha256_ctx2hash
+; === sha256_ctx2hash ===
+; this function converts a state into a normal hash (bytestring)
+;  param1: the 16-bit destination pointer
+;	given in r25,r24 (r25 is most significant)
+;  param2: the 16-bit pointer to sha256_ctx structure
+;	given in r23,r22
+sha256_ctx2hash:
+	movw r26, r22
+	movw r30, r24
+	ldi r21, 8
+	sbiw r26, 4
+1:	
+	ldi r20, 4
+	adiw r26, 8
+2:	
+		ld r0, -X
+		st Z+, r0	
+	dec r20
+	brne 2b
+	
+	dec r21
+	brne 1b
+	
+	ret
+
+;###########################################################	
+
+.global sha256
+; === sha256 ===
+; this function calculates SHA-256 hashes from messages in RAM
+;  param1: the 16-bit hash destination pointer
+;	given in r25,r24 (r25 is most significant)
+;  param2: the 16-bit pointer to message
+;	given in r23,r22
+;  param3: 32-bit length value (length of message in bits)
+;   given in r21,r20,r19,r18
+sha256:
+sha256_prolog:
+	push r8
+	push r9
+	push r10
+	push r11
+	push r12
+	push r13
+	push r16
+	push r17
+	in r16, SPL
+	in r17, SPH
+	subi r16, 8*4+8 
+	sbci r17, 0	
+	in r0, SREG
+	cli
+	out SPL, r16
+	out SPH, r17
+	out SREG, r0
+	
+	push r25
+	push r24
+	inc r16
+	adc r17, r1
+	
+	movw r8, r18		/* backup of length*/
+	movw r10, r20
+	
+	movw r12, r22	/* backup pf msg-ptr */
+	
+	movw r24, r16
+	rcall sha256_init
+	/* if length >= 512 */
+1:
+	tst r11
+	brne 4f
+	tst r10
+	brne 4f
+	mov r19, r9
+	cpi r19, 0x02
+	brlo 4f
+	
+	movw r24, r16
+	movw r22, r12
+	rcall sha256_nextBlock
+	ldi r19, 0x64
+	add r22, r19
+	adc r23, r1
+	/* length -= 512 */
+	ldi r19, 0x02
+	sub r9, r19
+	sbc r10, r1
+	sbc r11, r1
+	rjmp 1b
+	
+4:
+	movw r24, r16
+	movw r22, r12
+	movw r20, r8
+	rcall sha256_lastBlock
+	
+	pop r24
+	pop r25
+	movw r22, r16
+	rcall sha256_ctx2hash	
+	
+sha256_epilog:
+	in r30, SPL
+	in r31, SPH
+	adiw r30, 8*4+8 	
+	in r0, SREG
+	cli
+	out SPL, r30
+	out SPH, r31
+	out SREG, r0
+	pop r17
+	pop r16
+	pop r13
+	pop r12
+	pop r11
+	pop r10
+	pop r9
+	pop r8
+	ret
+
+;###########################################################	
+
+
+; block MUST NOT be larger than 64 bytes
+
+.global sha256_lastBlock
+; === sha256_lastBlock ===
+; this function does padding & Co. for calculating SHA-256 hashes
+;  param1: the 16-bit pointer to sha256_ctx structure
+;	given in r25,r24 (r25 is most significant)
+;  param2: an 16-bit pointer to 64 byte block to hash
+;	given in r23,r22
+;  param3: an 16-bit integer specifing length of block in bits
+;	given in r21,r20
+sha256_lastBlock_localSpace = (SHA256_BLOCK_BITS/8+1)
+
+
+sha256_lastBlock:
+	cpi r21, 0x02
+	brlo sha256_lastBlock_prolog
+	push r25
+	push r24
+	push r23
+	push r22
+	push r21
+	push r20
+	rcall sha256_nextBlock
+	pop r20
+	pop r21
+	pop r22
+	pop r23
+	pop r24
+	pop r25
+	subi r21, 0x02
+	subi r23, -2
+	rjmp sha256_lastBlock	
+sha256_lastBlock_prolog:
+	/* allocate space on stack */
+	in r30, SPL
+	in r31, SPH
+	in r1, SREG
+	subi r30, lo8(64)
+	sbci r31, hi8(64)
+	cli
+	out SPL, r30
+	out SPH, r31
+	out SREG,r1
+
+	adiw r30, 1 /* SP points to next free byte on stack */
+	mov r18, r20 /* r20 = LSB(length) */
+	lsr r18
+	lsr r18
+	lsr r18
+	bst r21, 0	/* may be we should explain this ... */
+	bld r18, 5  /* now: r18 == length/8 (aka. length in bytes) */
+	
+	
+	movw r26, r22 /* X points to begin of msg */
+	tst r18
+	breq sha256_lastBlock_post_copy
+	mov r1, r18
+sha256_lastBlock_copy_loop:
+	ld r0, X+
+	st Z+, r0
+	dec r1
+	brne sha256_lastBlock_copy_loop
+sha256_lastBlock_post_copy:	
+sha256_lastBlock_insert_stuffing_bit:	
+	ldi r19, 0x80
+	mov r0,r19 	
+	ldi r19, 0x07
+	and r19, r20 /* if we are in bitmode */
+	breq 2f	/* no bitmode */
+1:	
+	lsr r0
+	dec r19
+	brne 1b
+	ld r19, X
+/* maybe we should do some ANDing here, just for safety */
+	or r0, r19
+2:	
+	st Z+, r0
+	inc r18
+
+/* checking stuff here */
+	cpi r18, 64-8+1
+	brsh 0f 
+	rjmp sha256_lastBlock_insert_zeros
+0:
+	/* oh shit, we landed here */
+	/* first we have to fill it up with zeros */
+	ldi r19, 64
+	sub r19, r18
+	breq 2f
+1:	
+	st Z+, r1
+	dec r19
+	brne 1b	
+2:	
+	sbiw r30, 63
+	sbiw r30,  1
+	movw r22, r30
+	
+	push r31
+	push r30
+	push r25
+	push r24
+	push r21
+	push r20
+	rcall sha256_nextBlock
+	pop r20
+	pop r21
+	pop r24
+	pop r25
+	pop r30
+	pop r31
+	
+	/* now we should subtract 512 from length */
+	movw r26, r24
+	adiw r26, 4*8+1 /* we can skip the lowest byte */
+	ld r19, X
+	subi r19, hi8(512)
+	st X+, r19
+	ldi r18, 6
+1:
+	ld r19, X
+	sbci r19, 0
+	st X+, r19
+	dec r18
+	brne 1b
+	
+;	clr r18 /* not neccessary ;-) */
+	/* reset Z pointer to begin of block */
+
+sha256_lastBlock_insert_zeros:	
+	ldi r19, 64-8
+	sub r19, r18
+	breq sha256_lastBlock_insert_length
+	clr r1
+1:
+	st Z+, r1	/* r1 is still zero */
+	dec r19
+	brne 1b
+
+;	rjmp sha256_lastBlock_epilog
+sha256_lastBlock_insert_length:
+	movw r26, r24	/* X points to state */
+	adiw r26, 8*4	/* X points to (state.length) */
+	adiw r30, 8		/* Z points one after the last byte of block */
+	ld r0, X+
+	add r0, r20
+	st -Z, r0
+	ld r0, X+
+	adc r0, r21
+	st -Z, r0
+	ldi r19, 6
+1:
+	ld r0, X+
+	adc r0, r1
+	st -Z, r0
+	dec r19
+	brne 1b
+
+	sbiw r30, 64-8
+	movw r22, r30
+	rcall sha256_nextBlock
+
+sha256_lastBlock_epilog:
+	in r30, SPL
+	in r31, SPH
+	in r1, SREG
+	adiw r30, 63 ; lo8(64)
+	adiw r30,  1  ; hi8(64)
+	cli
+	out SPL, r30
+	out SPH, r31
+	out SREG,r1
+	clr r1
+	clr r0
+	ret
+
+/**/
+;###########################################################	
+
+.global sha256_nextBlock
+; === sha256_nextBlock ===
+; this is the core function for calculating SHA-256 hashes
+;  param1: the 16-bit pointer to sha256_ctx structure
+;	given in r25,r24 (r25 is most significant)
+;  param2: an 16-bit pointer to 64 byte block to hash
+;	given in r23,r22
+sha256_nextBlock_localSpace = (64+8)*4 ; 64 32-bit values for w array and 8 32-bit values for a array (total 288 byte)
+
+Bck1 = 12
+Bck2 = 13
+Bck3 = 14
+Bck4 = 15
+Func1 = 22
+Func2 = 23
+Func3 = 24
+Func4 = 25
+Accu1 = 16
+Accu2 = 17
+Accu3 = 18
+Accu4 = 19
+XAccu1 = 8
+XAccu2 = 9
+XAccu3 = 10
+XAccu4 = 11
+T1	= 4
+T2	= 5
+T3	= 6
+T4	= 7
+LoopC = 1
+/* byteorder: high number <--> high significance */
+sha256_nextBlock:
+ ; initial, let's make some space ready for local vars
+	push r4 /* replace push & pop by mem ops? */
+	push r5
+	push r6
+	push r7
+	push r8
+	push r9
+	push r10
+	push r11
+	push r12
+	push r13
+	push r14
+	push r15
+	push r16
+	push r17
+	push r28
+	push r29
+	in r20, SPL
+	in r21, SPH
+	movw r18, r20			;backup SP
+;	movw r26, r20			; X points to free space on stack 
+	movw r30, r22			; Z points to message
+	subi r20, lo8(sha256_nextBlock_localSpace) ;sbiw can do only up to 63
+	sbci r21, hi8(sha256_nextBlock_localSpace)
+	movw r26, r20			; X points to free space on stack 
+	in r0, SREG
+	cli ; we want to be uninterrupted while updating SP
+	out SPL, r20
+	out SPH, r21
+	out SREG, r0
+	push r18
+	push r19
+	push r24
+	push r25 /* param1 will be needed later */
+ ; now we fill the w array with message (think about endianess)
+ 	adiw r26, 1 ; X++
+ 	ldi r20, 16
+sha256_nextBlock_wcpyloop: 	
+ 	ld r23, Z+
+ 	ld r22, Z+
+ 	ld r19, Z+
+ 	ld r18, Z+
+ 	st X+, r18
+ 	st X+, r19
+ 	st X+, r22	
+	st X+, r23
+	dec r20
+	brne sha256_nextBlock_wcpyloop
+/*	for (i=16; i<64; ++i){
+		w[i] = SIGMA_b(w[i-2]) + w[i-7] + SIGMA_a(w[i-15]) + w[i-16];	
+	} */
+	/* r25,r24,r23,r24 (r21,r20) are function values
+	   r19,r18,r17,r16 are the accumulator
+	   r15,r14,r13,rBck1 are backup1
+	   r11,r10,r9 ,r8  are xor accu   
+	   r1 is round counter 								*/
+
+	ldi r20, 64-16
+	mov LoopC, r20
+sha256_nextBlock_wcalcloop:		 
+	movw r30, r26 ; cp X to Z
+	sbiw r30, 63
+	sbiw r30, 1 		; substract 64 = 16*4
+	ld Accu1, Z+
+	ld Accu2, Z+
+	ld Accu3, Z+
+	ld Accu4, Z+ /* w[i] = w[i-16] */
+	ld Bck1, Z+
+	ld Bck2, Z+
+	ld Bck3, Z+
+	ld Bck4, Z+ /* backup = w[i-15] */
+	/* now sigma 0 */
+	mov Func1, Bck2
+	mov Func2, Bck3
+	mov Func3, Bck4
+	mov Func4, Bck1  /* prerotated by 8 */
+	ldi r20, 1
+	rcall bitrotl
+	movw XAccu1, Func1
+	movw XAccu3, Func3	 /* store ROTR(w[i-15],7) in xor accu */
+	movw Func1, Bck3
+	movw Func3, Bck1 /* prerotated by 16 */
+	ldi r20, 2
+	rcall bitrotr
+	eor XAccu1, Func1  /* xor ROTR(w[i-15], 18)*/
+	eor XAccu2, Func2
+	eor XAccu3, Func3
+	eor XAccu4, Func4
+	ldi Func2, 3		 /* now shr3 */ /*we can destroy backup now*/
+sigma0_shr:
+	lsr Bck4
+	ror Bck3
+	ror Bck2
+	ror Bck1	
+	dec Func2
+	brne sigma0_shr
+	eor XAccu1, Bck1
+	eor XAccu2, Bck2
+	eor XAccu3, Bck3
+	eor XAccu4, Bck4	/* xor SHR(w[i-15], 3)*/ /* xor accu == sigma1(w[i-15]) */
+	add Accu1, XAccu1
+	adc Accu2, XAccu2
+	adc Accu3, XAccu3
+	adc Accu4, XAccu4 /* finished with sigma0 */
+	ldd Func1, Z+7*4  /* now accu += w[i-7] */
+	ldd Func2, Z+7*4+1
+	ldd Func3, Z+7*4+2
+	ldd Func4, Z+7*4+3
+	add Accu1, Func1
+	adc Accu2, Func2
+	adc Accu3, Func3
+	adc Accu4, Func4
+	ldd Bck1, Z+12*4 /* now backup = w[i-2]*/
+	ldd Bck2, Z+12*4+1
+	ldd Bck3, Z+12*4+2
+	ldd Bck4, Z+12*4+3
+	/* now sigma 1 */
+	movw Func1, Bck3
+	movw Func3, Bck1 /* prerotated by 16 */
+	ldi r20, 1
+	rcall bitrotr
+	movw XAccu3, Func3
+	movw XAccu1, Func1	 /* store in ROTR(w[i-2], 17) xor accu */
+;	movw Func1, Bck3
+;	movw Func3, Bck1 /* prerotated by 16 */
+	ldi r20, 2
+	rcall bitrotr
+	eor XAccu1, Func1  /* xor ROTR(w[i-2], 19)*/
+	eor XAccu2, Func2
+	eor XAccu3, Func3
+	eor XAccu4, Func4
+	ldi Func2, 2	 /* now shr10 (dirty trick, skipping a byte) */ /*we can destroy backup now*/
+sigma1_shr:
+	lsr Bck4
+	ror Bck3
+	ror Bck2	
+	dec Func2
+	brne sigma1_shr
+	eor XAccu1, Bck2
+	eor XAccu2, Bck3
+	eor XAccu3, Bck4  /* xor SHR(w[i-2], 10)*/ /* xor accu == sigma1(w[i-15]) */
+	add Accu1, XAccu1
+	adc Accu2, XAccu2
+	adc Accu3, XAccu3
+	adc Accu4, XAccu4 /* finished with sigma0 */
+	/* now let's store the shit */
+	st X+, Accu1
+	st X+, Accu2
+	st X+, Accu3
+	st X+, Accu4
+	dec LoopC
+	breq 3f  ; skip if zero
+	rjmp sha256_nextBlock_wcalcloop
+3:
+	/* we are finished with w array X points one byte post w */
+/* init a array */
+	pop r31
+	pop r30
+	push r30
+	push r31
+	ldi r25, 8*4 /* 8 32-bit values to copy from ctx to a array */
+init_a_array:	
+	ld r1, Z+
+	st X+, r1
+	dec r25
+	brne init_a_array
+	
+/* now the real fun begins */
+/* for (i=0; i<64; ++i){
+			t1 = a[7] + SIGMA1(a[4]) + CH(a[4],a[5],a[6]) + k[i] + w[i];
+			t2 = SIGMA0(a[0]) + MAJ(a[0],a[1],a[2]);
+			memmove(&(a[1]), &(a[0]), 7*4); 	// a[7]=a[6]; a[6]=a[5]; a[5]=a[4]; a[4]=a[3]; a[3]=a[2]; a[2]=a[1]; a[1]=a[0]; 
+			a[4] += t1;
+			a[0] = t1 + t2;
+		} */
+	/* Y points to a[0], Z ('cause lpm wants it) points to k[i], X points to w[i] */
+	sbiw r26, 8*4  /* X still points at a[7]+1*/
+	movw r28, r26
+	ldi r30, lo8(sha256_kv)
+	ldi r31, hi8(sha256_kv)		
+	dec r27  /* X - (64*4 == 256) */
+	ldi r25, 64
+	mov LoopC, r25
+sha256_main_loop:
+	/* now calculate t1 */
+	 /*CH(x,y,z) = (x&y)^((~x)&z)*/
+	ldd T1, Y+5*4
+	ldd T2, Y+5*4+1
+	ldd T3, Y+5*4+2
+	ldd T4, Y+5*4+3 /* y in T */
+	ldd Func1, Y+4*4
+	ldd Func2, Y+4*4+1
+	ldd Func3, Y+4*4+2
+	ldd Func4, Y+4*4+3  /* x in Func */
+	ldd Bck1, Y+6*4
+	ldd Bck2, Y+6*4+1
+	ldd Bck3, Y+6*4+2
+	ldd Bck4, Y+6*4+3 /* z in Bck */
+	and T1, Func1
+	and T2, Func2
+	and T3, Func3
+	and T4, Func4
+	com Func1
+	com Func2
+	com Func3
+	com Func4
+	and Bck1, Func1
+	and Bck2, Func2
+	and Bck3, Func3
+	and Bck4, Func4
+	eor T1, Bck1
+	eor T2, Bck2
+	eor T3, Bck3
+	eor T4, Bck4 /* done, CH(x,y,z) is in T */
+	/* now SIGMA1(a[4]) */
+	ldd Bck4, Y+4*4		/* think about using it from Func reg above*/
+	ldd Bck1, Y+4*4+1	
+	ldd Bck2, Y+4*4+2
+	ldd Bck3, Y+4*4+3 /* load prerotate by 8-bit */	
+	movw Func1, Bck1
+	movw Func3, Bck3
+	ldi r20, 2 
+	rcall bitrotl		/* rotr(x,6) */ 
+	movw XAccu1, Func1
+	movw XAccu3, Func3
+	movw Func1, Bck1
+	movw Func3, Bck3
+	ldi r20, 3 
+	rcall bitrotr 	/* rotr(x,11) */
+	eor XAccu1, Func1
+	eor XAccu2, Func2
+	eor XAccu3, Func3
+	eor XAccu4, Func4
+	movw Func1, Bck3 /* this prerotates furteh 16 bits*/
+	movw Func3, Bck1 /* so we have now prerotated by 24 bits*/
+	ldi r20, 1 
+	rcall bitrotr 	/* rotr(x,11) */
+	eor XAccu1, Func1
+	eor XAccu2, Func2
+	eor XAccu3, Func3
+	eor XAccu4, Func4 /* finished with SIGMA1, add it to T */
+	add T1, XAccu1
+	adc T2, XAccu2
+	adc T3, XAccu3
+	adc T4, XAccu4
+	/* now we've to add a[7], w[i] and k[i] */
+	ldd XAccu1, Y+4*7
+	ldd XAccu2, Y+4*7+1
+	ldd XAccu3, Y+4*7+2
+	ldd XAccu4, Y+4*7+3
+	add T1, XAccu1
+	adc T2, XAccu2
+	adc T3, XAccu3
+	adc T4, XAccu4 /* add a[7] */
+	ld XAccu1, X+
+	ld XAccu2, X+
+	ld XAccu3, X+
+	ld XAccu4, X+
+	add T1, XAccu1
+	adc T2, XAccu2
+	adc T3, XAccu3
+	adc T4, XAccu4 /* add w[i] */
+	lpm XAccu1, Z+
+	lpm XAccu2, Z+
+	lpm XAccu3, Z+
+	lpm XAccu4, Z+
+	add T1, XAccu1
+	adc T2, XAccu2
+	adc T3, XAccu3
+	adc T4, XAccu4 /* add k[i] */ /* finished with t1 */
+	/*now t2 = SIGMA0(a[0]) + MAJ(a[0],a[1],a[2]) */ /*i did to much x86 asm, i always see 4 32bit regs*/
+		/* starting with MAJ(x,y,z) */
+	ldd Func1, Y+4*0+0
+	ldd Func2, Y+4*0+1
+	ldd Func3, Y+4*0+2
+	ldd Func4, Y+4*0+3 /* load x=a[0] */
+	ldd XAccu1, Y+4*1+0
+	ldd XAccu2, Y+4*1+1
+	ldd XAccu3, Y+4*1+2
+	ldd XAccu4, Y+4*1+3 /* load y=a[1] */
+	and XAccu1, Func1
+	and XAccu2, Func2
+	and XAccu3, Func3
+	and XAccu4, Func4	/* XAccu == (x & y) */
+	ldd Bck1, Y+4*2+0
+	ldd Bck2, Y+4*2+1
+	ldd Bck3, Y+4*2+2
+	ldd Bck4, Y+4*2+3 /* load z=a[2] */
+	and Func1, Bck1
+	and Func2, Bck2
+	and Func3, Bck3
+	and Func4, Bck4
+	eor XAccu1, Func1
+	eor XAccu2, Func2
+	eor XAccu3, Func3
+	eor XAccu4, Func4	/* XAccu == (x & y) ^ (x & z) */
+	ldd Func1, Y+4*1+0
+	ldd Func2, Y+4*1+1
+	ldd Func3, Y+4*1+2
+	ldd Func4, Y+4*1+3 /* load y=a[1] */
+	and Func1, Bck1
+	and Func2, Bck2
+	and Func3, Bck3
+	and Func4, Bck4
+	eor XAccu1, Func1
+	eor XAccu2, Func2
+	eor XAccu3, Func3
+	eor XAccu4, Func4	/* XAccu == Maj(x,y,z) == (x & y) ^ (x & z) ^ (y & z) */
+   	/* SIGMA0(a[0]) */
+	ldd Bck1, Y+4*0+0 /* we should combine this with above */
+	ldd Bck2, Y+4*0+1
+	ldd Bck3, Y+4*0+2
+	ldd Bck4, Y+4*0+3
+	movw Func1, Bck1
+	movw Func3, Bck3
+	ldi r20, 2
+	rcall bitrotr
+	movw Accu1, Func1
+	movw Accu3, Func3 /* Accu = shr(a[0], 2) */
+	movw Func1, Bck3 
+	movw Func3, Bck1 /* prerotate by 16 bits */
+	ldi r20, 3
+	rcall bitrotl
+	eor Accu1, Func1
+	eor Accu2, Func2
+	eor Accu3, Func3
+	eor Accu4, Func4 /* Accu ^= shr(a[0], 13) */
+	mov Func1, Bck4
+	mov Func2, Bck1
+	mov Func3, Bck2
+	mov Func4, Bck3  /* prerotate by 24 bits */
+	ldi r20, 2
+	rcall bitrotl
+	eor Accu1, Func1
+	eor Accu2, Func2
+	eor Accu3, Func3
+	eor Accu4, Func4 /* Accu ^= shr(a[0], 22) */
+	add Accu1, XAccu1 /* add previous result (MAJ)*/
+	adc Accu2, XAccu2
+	adc Accu3, XAccu3
+	adc Accu4, XAccu4
+	/* now we are finished with the computing stuff (t1 in T, t2 in Accu)*/
+	/* a[7]=a[6]; a[6]=a[5]; a[5]=a[4]; a[4]=a[3]; a[3]=a[2]; a[2]=a[1]; a[1]=a[0]; */
+
+	ldi r21, 7*4
+	adiw r28, 7*4
+a_shift_loop:
+	ld  r25, -Y /* warning: this is PREdecrement */
+	std Y+4, r25
+	dec r21
+	brne a_shift_loop
+
+	ldd Bck1, Y+4*4+0
+	ldd Bck2, Y+4*4+1
+	ldd Bck3, Y+4*4+2
+	ldd Bck4, Y+4*4+3
+	add Bck1, T1
+	adc Bck2, T2
+	adc Bck3, T3
+	adc Bck4, T4
+	std Y+4*4+0, Bck1
+	std Y+4*4+1, Bck2
+	std Y+4*4+2, Bck3
+	std Y+4*4+3, Bck4
+	add Accu1, T1
+	adc Accu2, T2
+	adc Accu3, T3
+	adc Accu4, T4
+	std Y+4*0+0, Accu1
+	std Y+4*0+1, Accu2
+	std Y+4*0+2, Accu3
+	std Y+4*0+3, Accu4 /* a array updated */
+	
+	
+	dec LoopC
+	breq update_state
+	rjmp sha256_main_loop ;brne sha256_main_loop
+update_state:	
+	/* update state */
+	/* pointers to state should still exist on the stack ;-) */
+	pop r31
+	pop r30
+	ldi r21, 8
+update_state_loop:
+	ldd Accu1, Z+0
+	ldd Accu2, Z+1
+	ldd Accu3, Z+2
+	ldd Accu4, Z+3 
+	ld Func1, Y+
+	ld Func2, Y+
+	ld Func3, Y+
+	ld Func4, Y+
+	add Accu1, Func1
+	adc Accu2, Func2
+	adc Accu3, Func3
+	adc Accu4, Func4
+	st Z+, Accu1
+	st Z+, Accu2
+	st Z+, Accu3
+	st Z+, Accu4
+	dec r21
+	brne update_state_loop
+	/* now we just have to update the length */
+	adiw r30, 1 /* since we add 512, we can simply skip the LSB */ 
+	ldi r21, 2
+	ldi r22, 6
+	ld r20, Z
+	add r20, r21
+	st Z+, r20	
+	clr r21
+sha256_nextBlock_fix_length:	
+	brcc sha256_nextBlock_epilog
+	ld r20, Z
+	adc r20, r21
+	st Z+, r20
+	dec r22
+	brne sha256_nextBlock_fix_length
+	
+; EPILOG
+sha256_nextBlock_epilog:
+/* now we should clean up the stack */
+	
+	pop r21
+	pop r20
+	in r0, SREG
+	cli ; we want to be uninterrupted while updating SP
+	out SPL, r20
+	out SPH, r21
+	out SREG, r0
+	
+	clr r1
+	pop r29
+	pop r28
+	pop r17
+	pop r16
+	pop r15
+	pop r14
+	pop r13
+	pop r12
+	pop r11
+	pop r10
+	pop r9
+	pop r8
+	pop r7
+	pop r6
+	pop r5
+	pop r4 
+	ret
+
+sha256_kv: ; round-key-vector stored in ProgMem 
+.word	0x2f98, 0x428a, 0x4491, 0x7137, 0xfbcf, 0xb5c0, 0xdba5, 0xe9b5, 0xc25b, 0x3956, 0x11f1, 0x59f1, 0x82a4, 0x923f, 0x5ed5, 0xab1c
+.word	0xaa98, 0xd807, 0x5b01, 0x1283, 0x85be, 0x2431, 0x7dc3, 0x550c, 0x5d74, 0x72be, 0xb1fe, 0x80de, 0x06a7, 0x9bdc, 0xf174, 0xc19b
+.word	0x69c1, 0xe49b, 0x4786, 0xefbe, 0x9dc6, 0x0fc1, 0xa1cc, 0x240c, 0x2c6f, 0x2de9, 0x84aa, 0x4a74, 0xa9dc, 0x5cb0, 0x88da, 0x76f9
+.word	0x5152, 0x983e, 0xc66d, 0xa831, 0x27c8, 0xb003, 0x7fc7, 0xbf59, 0x0bf3, 0xc6e0, 0x9147, 0xd5a7, 0x6351, 0x06ca, 0x2967, 0x1429
+.word	0x0a85, 0x27b7, 0x2138, 0x2e1b, 0x6dfc, 0x4d2c, 0x0d13, 0x5338, 0x7354, 0x650a, 0x0abb, 0x766a, 0xc92e, 0x81c2, 0x2c85, 0x9272
+.word	0xe8a1, 0xa2bf, 0x664b, 0xa81a, 0x8b70, 0xc24b, 0x51a3, 0xc76c, 0xe819, 0xd192, 0x0624, 0xd699, 0x3585, 0xf40e, 0xa070, 0x106a
+.word	0xc116, 0x19a4, 0x6c08, 0x1e37, 0x774c, 0x2748, 0xbcb5, 0x34b0, 0x0cb3, 0x391c, 0xaa4a, 0x4ed8, 0xca4f, 0x5b9c, 0x6ff3, 0x682e
+.word	0x82ee, 0x748f, 0x636f, 0x78a5, 0x7814, 0x84c8, 0x0208, 0x8cc7, 0xfffa, 0x90be, 0x6ceb, 0xa450, 0xa3f7, 0xbef9, 0x78f2, 0xc671
+
+	
+;###########################################################	
+
+.global sha256_init 
+;uint32_t sha256_init_vector[]={
+;  	0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A,
+;	0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19 };
+;
+;void sha256_init(sha256_ctx_t *state){
+;	state->length=0;
+;	memcpy(state->h, sha256_init_vector, 8*4);
+;}
+; param1: (r23,r24) 16-bit pointer to sha256_ctx_t struct in ram
+; modifys: Z(r30,r31), Func1, r22
+sha256_init:
+	movw r26, r24 ; (24,25) --> (26,27) load X with param1
+	ldi r30, lo8((sha256_init_vector))
+	ldi r31, hi8((sha256_init_vector))
+	ldi r22, 32+8
+sha256_init_vloop:	
+	lpm r23, Z+ 
+	st X+, r23
+	dec r22
+	brne sha256_init_vloop
+	ret
+	
+sha256_init_vector:
+.word 0xE667, 0x6A09
+.word 0xAE85, 0xBB67 
+.word 0xF372, 0x3C6E 
+.word 0xF53A, 0xA54F 
+.word 0x527F, 0x510E 
+.word 0x688C, 0x9B05 
+.word 0xD9AB, 0x1F83 
+.word 0xCD19, 0x5BE0
+.word 0x0000, 0x0000
+.word 0x0000, 0x0000
+
+;###########################################################	
+
+.global rotl32
+; === ROTL32 ===
+; function that rotates a 32 bit word to the left
+;  param1: the 32-bit word to rotate
+;	given in r25,r24,r23,r22 (r25 is most significant)
+;  param2: an 8-bit value telling how often to rotate
+;	given in r20
+; modifys: r21, r22
+rotl32:
+	cpi r20, 8
+	brlo bitrotl
+	mov r21, r25
+	mov r25, r24
+	mov r24, r23
+	mov r23, r22
+	mov r22, r21
+	subi r20, 8
+	rjmp rotl32
+bitrotl:
+	clr r21
+	clc
+bitrotl_loop:	
+	tst r20
+	breq fixrotl
+	rol r22
+	rol r23
+	rol r24
+	rol r25
+	rol r21
+	dec r20
+	rjmp bitrotl_loop
+fixrotl:
+	or r22, r21
+	ret
+	
+
+;###########################################################	
+
+.global rotr32
+; === ROTR32 ===
+; function that rotates a 32 bit word to the right
+;  param1: the 32-bit word to rotate
+;	given in r25,r24,r23,22 (r25 is most significant)
+;  param2: an 8-bit value telling how often to rotate
+;	given in r20
+; modifys: r21, r22
+rotr32:
+	cpi r20, 8
+	brlo bitrotr
+	mov r21, r22
+	mov r22, r23
+	mov r23, r24
+	mov r24, r25
+	mov r25, r21
+	subi r20, 8
+	rjmp rotr32
+bitrotr:
+	clr r21
+	clc
+bitrotr_loop:	
+	tst r20
+	breq fixrotr
+	ror r25
+	ror r24
+	ror r23
+	ror r22
+	ror r21
+	dec r20
+	rjmp bitrotr_loop
+fixrotr:
+	or r25, r21
+	ret
+	
+	
+;###########################################################	
+	
+.global change_endian32
+; === change_endian32 ===
+; function that changes the endianess of a 32-bit word
+;  param1: the 32-bit word
+;	given in r25,r24,r23,22 (r25 is most significant)
+;  modifys: r21, r22
+change_endian32:
+	movw r20,  r22 ; (r22,r23) --> (r20,r21)
+	mov r22, r25
+	mov r23, r24
+	mov r24, r21
+	mov r25, r20 
+	ret
+
diff --git a/sha256/sha256.c b/sha256/sha256.c
new file mode 100644
index 0000000..f654968
--- /dev/null
+++ b/sha256/sha256.c
@@ -0,0 +1,231 @@
+/* sha256.c */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/**
+ * \file		sha256.c
+ * \author		Daniel Otte 
+ * \date		16.05.2006
+ * 
+ * \par License:	
+ * 	GPL
+ * 
+ * \brief SHA-256 implementation.
+ * 
+ * 
+ */
+
+#include <stdint.h>
+#include <string.h> /* for memcpy, memmove, memset */
+#include "sha256.h"
+
+#define LITTLE_ENDIAN
+
+#if defined LITTLE_ENDIAN
+#elif defined BIG_ENDIAN
+#else
+	#error specify endianess!!!
+#endif
+
+
+/*************************************************************************/
+
+uint32_t sha256_init_vector[]={
+	0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A,
+    0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19 };
+
+
+/*************************************************************************/
+
+/**
+ * \brief \c sh256_init initialises a sha256 context for hashing. 
+ * \c sh256_init c initialises the given sha256 context for hashing
+ * @param state pointer to a sha256 context
+ * @return none
+ */
+void sha256_init(sha256_ctx_t *state){
+	state->length=0;
+	memcpy(state->h, sha256_init_vector, 8*4);
+}
+
+/*************************************************************************/
+
+/**
+ * rotate x right by n positions
+ */
+uint32_t rotr32( uint32_t x, uint8_t n){
+	return ((x>>n) | (x<<(32-n)));
+}
+
+
+/*************************************************************************/
+
+// #define CHANGE_ENDIAN32(x) (((x)<<24) | ((x)>>24) | (((x)& 0x0000ff00)<<8) | (((x)& 0x00ff0000)>>8))
+
+uint32_t change_endian32(uint32_t x){
+	return (((x)<<24) | ((x)>>24) | (((x)& 0x0000ff00)<<8) | (((x)& 0x00ff0000)>>8));
+}
+
+
+/*************************************************************************/
+
+/* sha256 functions as macros for speed and size, cause they are called only once */
+
+#define CH(x,y,z)  (((x)&(y)) ^ ((~(x))&(z)))
+#define MAJ(x,y,z) (((x)&(y)) ^ ((x)&(z)) ^ ((y)&(z)))
+
+#define SIGMA0(x) (rotr32((x),2) ^ rotr32((x),13) ^ rotr32((x),22))
+#define SIGMA1(x) (rotr32((x),6) ^ rotr32((x),11) ^ rotr32((x),25))
+#define SIGMA_a(x) (rotr32((x),7)  ^ rotr32((x),18) ^ ((x)>>3))
+#define SIGMA_b(x) (rotr32((x),17) ^ rotr32((x),19) ^ ((x)>>10))
+
+
+uint32_t k[]={
+	0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
+	0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
+	0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
+	0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
+	0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
+	0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
+	0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
+	0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
+};
+
+
+/*************************************************************************/
+
+/**
+ * block must be, 512, Bit = 64, Byte, long !!!
+ */
+void sha256_nextBlock (sha256_ctx_t *state, const void* block){
+	uint32_t w[64];	/* this is 256, byte, large, */
+	uint8_t  i;
+	uint32_t a[8],t1,t2;
+
+	/* init w */
+#if defined LITTLE_ENDIAN
+		for (i=0; i<16; ++i){
+			w[i]= change_endian32(((uint32_t*)block)[i]);
+		}
+#elif defined BIG_ENDIAN
+		memcpy((void*)w, block, 64);
+#endif
+		for (i=16; i<64; ++i){
+			w[i] = SIGMA_b(w[i-2]) + w[i-7] + SIGMA_a(w[i-15]) + w[i-16];	
+		}
+
+	/* init working variables */	
+		memcpy((void*)a,(void*)(state->h), 8*4);
+
+	/* do the, fun stuff, */
+		for (i=0; i<64; ++i){
+			t1 = a[7] + SIGMA1(a[4]) + CH(a[4],a[5],a[6]) + k[i] + w[i];
+			t2 = SIGMA0(a[0]) + MAJ(a[0],a[1],a[2]);
+			memmove(&(a[1]), &(a[0]), 7*4); 	/* a[7]=a[6]; a[6]=a[5]; a[5]=a[4]; a[4]=a[3]; a[3]=a[2]; a[2]=a[1]; a[1]=a[0]; */
+			a[4] += t1;
+			a[0] = t1 + t2;
+		}
+
+	/* update, the, state, */
+		for (i=0; i<8; ++i){
+			state->h[i] += a[i];
+		}	
+		state->length += 512;
+} 
+
+
+/*************************************************************************/
+
+/**
+ * \brief function to process the last block being hashed
+ * @param state Pointer to the context in which this block should be processed.
+ * @param block Pointer to the message wich should be hashed.
+ * @param length is the length of only THIS block in BITS not in bytes!
+ *  bits are big endian, meaning high bits come first.
+ * 	if you have a message with bits at the end, the byte must be padded with zeros 
+ */
+void sha256_lastBlock(sha256_ctx_t *state, const void* block, uint16_t length){
+	uint8_t lb[SHA256_BLOCK_BITS/8]; /* local block */
+	state->length += length;
+	memcpy (&(lb[0]), block, length/8);
+	
+	/* set the final one bit */
+	if (length & 0x7){ // if we have single bits at the end
+		lb[length/8] = ((uint8_t*)(block))[length/8];
+	} else {
+		lb[length/8] = 0;
+	}
+	lb[length/8] |= 0x80>>(length & 0x7);
+	length =(length >> 3) + 1; /* from now on length contains the number of BYTES in lb*/
+	/* pad with zeros */
+	if (length>64-8){ /* not enouth space for 64bit length value */
+		memset((void*)(&(lb[length])), 0, 64-length);
+		sha256_nextBlock(state, lb);
+		state->length -= 512;
+		length = 0;	
+	}
+	memset((void*)(&(lb[length])), 0, 56-length);
+	/* store the 64bit length value */
+#if defined LITTLE_ENDIAN
+	 	/* this is now rolled up */
+	uint8_t i; 	
+	for (i=1; i<=8; ++i){
+		lb[55+i] = (uint8_t)(state->length>>(64- 8*i));
+	}
+#elif defined BIG_ENDIAN
+	*((uint64_t)&(lb[56])) = state->length;
+#endif
+	sha256_nextBlock(state, lb);
+}
+
+
+/*************************************************************************/
+
+/*
+ * length in bits!
+ */
+void sha256(sha256_hash_t *dest, const void* msg, uint32_t length){ /* length could be choosen longer but this is for ÂµC */
+	sha256_ctx_t s;
+	sha256_init(&s);
+	while(length >= SHA256_BLOCK_BITS){
+		sha256_nextBlock(&s, msg);
+		msg = (uint8_t*)msg + SHA256_BLOCK_BITS/8;
+		length -= SHA256_BLOCK_BITS;
+	}
+	sha256_lastBlock(&s, msg, length);
+	sha256_ctx2hash(dest,&s);
+}
+
+
+
+/*************************************************************************/
+
+void sha256_ctx2hash(sha256_hash_t *dest, const sha256_ctx_t *state){
+#if defined LITTLE_ENDIAN
+	uint8_t i;
+	for(i=0; i<8; ++i){
+		((uint32_t*)dest)[i] = change_endian32(state->h[i]);
+	}
+#elif BIG_ENDIAN
+	if (dest != state->h)
+		memcpy(dest, state->h, SHA256_HASH_BITS/8);
+#else
+# error unsupported endian type!
+#endif
+}
+
+
diff --git a/sha256/sha256.h b/sha256/sha256.h
new file mode 100644
index 0000000..24960a3
--- /dev/null
+++ b/sha256/sha256.h
@@ -0,0 +1,122 @@
+/* sha256.h */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/**
+ * \file	sha256.h
+ * \author  Daniel Otte 
+ * \date    2006-05-16
+ * \license	GPLv3 or later
+ * 
+ */
+
+#ifndef SHA256_H_
+#define SHA256_H_
+
+#define __LITTLE_ENDIAN__
+
+
+#include <stdint.h>
+
+/** \def SHA256_HASH_BITS
+ * defines the size of a SHA-256 hash value in bits
+ */
+
+/** \def SHA256_HASH_BYTES
+ * defines the size of a SHA-256 hash value in bytes
+ */
+
+/** \def SHA256_BLOCK_BITS
+ * defines the size of a SHA-256 input block in bits
+ */
+
+/** \def SHA256_BLOCK_BYTES
+ * defines the size of a SHA-256 input block in bytes
+ */
+
+#define SHA256_HASH_BITS  256
+#define SHA256_HASH_BYTES (SHA256_HASH_BITS/8)
+#define SHA256_BLOCK_BITS 512
+#define SHA256_BLOCK_BYTES (SHA256_BLOCK_BITS/8)
+
+/** \typedef sha256_ctx_t
+ * \brief SHA-256 context type
+ * 
+ * A variable of this type may hold the state of a SHA-256 hashing process
+ */
+typedef struct {
+	uint32_t h[8];
+	uint64_t length;
+} sha256_ctx_t;
+
+/** \typedef sha256_hash_t
+ * \brief SHA-256 hash value type
+ * 
+ * A variable of this type may hold the hash value produced by the
+ * sha256_ctx2hash(sha256_hash_t* dest, const sha256_ctx_t* state) function.
+ */
+typedef uint8_t sha256_hash_t[SHA256_HASH_BYTES];
+
+/** \fn void sha256_init(sha256_ctx_t *state)
+ * \brief initialise a SHA-256 context
+ * 
+ * This function sets a ::sha256_ctx_t to the initial values for hashing.
+ * \param state pointer to the SHA-256 hashing context
+ */
+void sha256_init(sha256_ctx_t *state);
+
+/** \fn void sha256_nextBlock (sha256_ctx_t* state, const void* block)
+ * \brief update the context with a given block
+ * 
+ * This function updates the SHA-256 hash context by processing the given block
+ * of fixed length.
+ * \param state pointer to the SHA-256 hash context
+ * \param block pointer to the block of fixed length (512 bit = 64 byte)
+ */
+void sha256_nextBlock (sha256_ctx_t* state, const void* block);
+
+/** \fn void sha256_lastBlock(sha256_ctx_t* state, const void* block, uint16_t length_b)
+ * \brief finalize the context with the given block 
+ * 
+ * This function finalizes the SHA-256 hash context by processing the given block
+ * of variable length.
+ * \param state pointer to the SHA-256 hash context
+ * \param block pointer to the block of fixed length (512 bit = 64 byte)
+ * \param length_b the length of the block in bits
+ */
+void sha256_lastBlock(sha256_ctx_t* state, const void* block, uint16_t length_b);
+
+/** \fn void sha256_ctx2hash(sha256_hash_t* dest, const sha256_ctx_t* state)
+ * \brief convert the hash state into the hash value
+ * This function reads the context and writes the hash value to the destination
+ * \param dest pointer to the location where the hash value should be written
+ * \param state pointer to the SHA-256 hash context
+ */
+void sha256_ctx2hash(sha256_hash_t* dest, const sha256_ctx_t* state);
+
+/** \fn void sha256(sha256_hash_t* dest, const void* msg, uint32_t length_b)
+ * \brief simple SHA-256 hashing function for direct hashing
+ * 
+ * This function automaticaly hashes a given message of arbitary length with
+ * the SHA-256 hashing algorithm.
+ * \param dest pointer to the location where the hash value is going to be written to
+ * \param msg pointer to the message thats going to be hashed
+ * \param length_b length of the message in bits
+ */
+void sha256(sha256_hash_t* dest, const void* msg, uint32_t length_b);
+
+#endif /*SHA256_H_*/
diff --git a/shabea.c b/shabea.c
deleted file mode 100644
index b59e138..0000000
--- a/shabea.c
+++ /dev/null
@@ -1,91 +0,0 @@
-/* shabea.c */
-/*
- *   This file is part of AnonAccess, an access system which can be used
- *    to open door or doing other things with an anonymity featured
- *    account managment.
- *   Copyright (C) 2006, 2007, 2008  Daniel Otte (daniel.otte@rub.de)
- *
- *   This program is free software: you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation, either version 3 of the License, or
- *   (at your option) any later version.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-/**
- * \file	shabea.c
- * \author	Daniel Otte 
- * \date	2007-06-07
- * \brief	SHABEA - a SHA Based Encryption Algorithm implementation
- * \par License	
- * GPL
- * 
- * SHABEAn-r where n is the blocksize and r the number of round used
- * 
- * 
- */
-#include <stdlib.h>
-#include <string.h>
-#include "sha256.h"
-
-#include "config.h"
-#include "memxor.h"
-
-
-/*
- * SHABEA256-n
- */ 
- 
-#define SHABEA_BLOCKSIZE 256
-#define SHABEA_BLOCKSIZEB (SHABEA_BLOCKSIZE/8)
-#define SHABEA_HALFSIZEB  (SHABEA_BLOCKSIZEB/2)
-#define SHABEA_HALFSIZE (SHABEA_BLOCKSIZE/2)
-
-#define L ((uint8_t*)block+ 0)
-#define R ((uint8_t*)block+16)
-void shabea256(void * block, void * key, uint16_t keysize_b, uint8_t enc, uint8_t rounds){
-	uint8_t r;		/**/
-	uint8_t tb[SHABEA_HALFSIZEB+2+(keysize_b+7)/8];	/**/
-	uint16_t kbs;	/* bytes used for the key / temporary block */
-	sha256_hash_t hash;
-	uint8_t termcond; 
-	int8_t dir;
-	if(enc){
-		r = 0;
-		termcond = rounds-1;
-		dir = 1;
-	} else {
-		r = rounds-1;
-		termcond = 0;
-		dir = -1;
-	}
-	kbs = (keysize_b+7)/8;
-	memcpy(tb+SHABEA_HALFSIZEB+2, key, kbs); /* copy key to temporary block */
-	tb[SHABEA_HALFSIZEB+0] = 0;	/* set round counter high value to zero */
-	
-	for(;;r+=dir){ /* enc: 0..(rounds-1) ; !enc: (rounds-1)..0 */
-		memcpy(tb, R, SHABEA_HALFSIZEB); /* copy right half into tb */
-		tb[SHABEA_HALFSIZEB+1] = r;
-		sha256(&hash, tb, SHABEA_HALFSIZE+16+keysize_b);
-		if(r!=termcond){	
-			/* swap */
-			memxor(hash, L, SHABEA_HALFSIZEB);
-			memcpy(L, R, SHABEA_HALFSIZEB);
-			memcpy(R, hash, SHABEA_HALFSIZEB);
-		} else {
-			/* last round */
-			/* no swap */
-			memxor(L, hash, SHABEA_HALFSIZEB);
-			return;	
-		}
-	}
-}
-
-
diff --git a/shabea.h b/shabea.h
deleted file mode 100644
index fdb4916..0000000
--- a/shabea.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/* shabea.h */
-/*
- *   This file is part of AnonAccess, an access system which can be used
- *    to open door or doing other things with an anonymity featured
- *    account managment.
- *   Copyright (C) 2006, 2007, 2008  Daniel Otte (daniel.otte@rub.de)
- *
- *   This program is free software: you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation, either version 3 of the License, or
- *   (at your option) any later version.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-
-/**
- * \file	shabea.h
- * \author	Daniel Otte 
- * \date	2007-06-07
- * \brief	SHABEA - a SHA Based Encryption Algorithm declarations
- * \par License	
- * GPL
- * 
- * SHABEAn-r where n is the blocksize and r the number of round used
- * 
- */
- 
-#ifndef SHABEA_H_
-#define SHABEA_H_
-
-void shabea256(void * block, void * key, uint16_t keysize_b, uint8_t enc, uint8_t rounds);
-#endif /*SHABEA_H_*/
diff --git a/shabea/memxor.S b/shabea/memxor.S
new file mode 100644
index 0000000..a32058b
--- /dev/null
+++ b/shabea/memxor.S
@@ -0,0 +1,66 @@
+/* memxor.S */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+ * File:        memxor.S
+ * Author:      Daniel Otte
+ * Date:        2008-08-07
+ * License:     GPLv3 or later
+ * Description: memxor, XORing one block into another
+ *
+ */
+
+/*
+ * void memxor(void* dest, const void* src, uint16_t n);
+ */
+ /*
+  * param dest is passed in r24:r25
+  * param src  is passed in r22:r23
+  * param n    is passed in r20:r21
+  */
+.global memxor
+memxor:
+	movw r30, r24
+	movw r26, r22
+	movw r24, r20
+	adiw r24, 0
+	breq 2f
+1:
+	ld r20, X+
+	ld r21, Z
+	eor r20, r21
+	st Z+, r20
+	sbiw r24, 1
+	brne 1b
+2:
+	ret
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/shabea/memxor.h b/shabea/memxor.h
new file mode 100644
index 0000000..a62a616
--- /dev/null
+++ b/shabea/memxor.h
@@ -0,0 +1,7 @@
+#ifndef MEMXOR_H_
+#define MEMXOR_H_
+#include <stdint.h>
+
+void memxor(void* dest, const void* src, uint16_t n);
+
+#endif
diff --git a/shabea/sha256-asm.S b/shabea/sha256-asm.S
new file mode 100644
index 0000000..d9eb6b6
--- /dev/null
+++ b/shabea/sha256-asm.S
@@ -0,0 +1,1042 @@
+/* sha256-asm.S */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/*
+ * Author:	Daniel Otte
+ *
+ * License: GPLv3 or later
+*/
+; sha-256 implementation in assembler	
+SHA256_BLOCK_BITS = 512
+SHA256_HASH_BITS = 256
+
+.macro precall
+	/* push r18 - r27, r30 - r31*/
+	push r0
+	push r1
+	push r18
+	push r19
+	push r20
+	push r21
+	push r22
+	push r23
+	push r24
+	push r25
+	push r26
+	push r27
+	push r30
+	push r31
+	clr r1
+.endm
+
+.macro postcall
+	pop r31
+	pop r30
+	pop r27
+	pop r26
+	pop r25
+	pop r24
+	pop r23
+	pop r22
+	pop r21
+	pop r20
+	pop r19
+	pop r18
+	pop r1
+	pop r0
+.endm
+
+
+.macro hexdump length
+	push r27
+	push r26
+	ldi r25, '\r'
+	mov r24, r25
+	call uart_putc
+	ldi r25, '\n'
+	mov r24, r25
+	call uart_putc
+	pop r26
+	pop r27
+	movw r24, r26
+.if \length > 16
+	ldi r22, lo8(16)
+	ldi r23, hi8(16)
+	push r27
+	push r26
+	call uart_hexdump
+	pop r26
+	pop r27
+	adiw r26, 16
+	hexdump \length-16
+.else
+	ldi r22, lo8(\length)
+	ldi r23, hi8(\length)
+	call uart_hexdump
+.endif
+.endm
+
+/* X points to Block */
+.macro dbg_hexdump length
+	precall
+	hexdump \length
+	postcall
+.endm
+
+.section .text
+
+SPL = 0x3D
+SPH = 0x3E
+SREG = 0x3F
+
+
+;
+;sha256_ctx_t is:
+;
+; [h0][h1][h2][h3][h4][h5][h6][h7][length]
+; hn is 32 bit large, length is 64 bit large
+
+;###########################################################	
+
+.global sha256_ctx2hash
+; === sha256_ctx2hash ===
+; this function converts a state into a normal hash (bytestring)
+;  param1: the 16-bit destination pointer
+;	given in r25,r24 (r25 is most significant)
+;  param2: the 16-bit pointer to sha256_ctx structure
+;	given in r23,r22
+sha256_ctx2hash:
+	movw r26, r22
+	movw r30, r24
+	ldi r21, 8
+	sbiw r26, 4
+1:	
+	ldi r20, 4
+	adiw r26, 8
+2:	
+		ld r0, -X
+		st Z+, r0	
+	dec r20
+	brne 2b
+	
+	dec r21
+	brne 1b
+	
+	ret
+
+;###########################################################	
+
+.global sha256
+; === sha256 ===
+; this function calculates SHA-256 hashes from messages in RAM
+;  param1: the 16-bit hash destination pointer
+;	given in r25,r24 (r25 is most significant)
+;  param2: the 16-bit pointer to message
+;	given in r23,r22
+;  param3: 32-bit length value (length of message in bits)
+;   given in r21,r20,r19,r18
+sha256:
+sha256_prolog:
+	push r8
+	push r9
+	push r10
+	push r11
+	push r12
+	push r13
+	push r16
+	push r17
+	in r16, SPL
+	in r17, SPH
+	subi r16, 8*4+8 
+	sbci r17, 0	
+	in r0, SREG
+	cli
+	out SPL, r16
+	out SPH, r17
+	out SREG, r0
+	
+	push r25
+	push r24
+	inc r16
+	adc r17, r1
+	
+	movw r8, r18		/* backup of length*/
+	movw r10, r20
+	
+	movw r12, r22	/* backup pf msg-ptr */
+	
+	movw r24, r16
+	rcall sha256_init
+	/* if length >= 512 */
+1:
+	tst r11
+	brne 4f
+	tst r10
+	brne 4f
+	mov r19, r9
+	cpi r19, 0x02
+	brlo 4f
+	
+	movw r24, r16
+	movw r22, r12
+	rcall sha256_nextBlock
+	ldi r19, 0x64
+	add r22, r19
+	adc r23, r1
+	/* length -= 512 */
+	ldi r19, 0x02
+	sub r9, r19
+	sbc r10, r1
+	sbc r11, r1
+	rjmp 1b
+	
+4:
+	movw r24, r16
+	movw r22, r12
+	movw r20, r8
+	rcall sha256_lastBlock
+	
+	pop r24
+	pop r25
+	movw r22, r16
+	rcall sha256_ctx2hash	
+	
+sha256_epilog:
+	in r30, SPL
+	in r31, SPH
+	adiw r30, 8*4+8 	
+	in r0, SREG
+	cli
+	out SPL, r30
+	out SPH, r31
+	out SREG, r0
+	pop r17
+	pop r16
+	pop r13
+	pop r12
+	pop r11
+	pop r10
+	pop r9
+	pop r8
+	ret
+
+;###########################################################	
+
+
+; block MUST NOT be larger than 64 bytes
+
+.global sha256_lastBlock
+; === sha256_lastBlock ===
+; this function does padding & Co. for calculating SHA-256 hashes
+;  param1: the 16-bit pointer to sha256_ctx structure
+;	given in r25,r24 (r25 is most significant)
+;  param2: an 16-bit pointer to 64 byte block to hash
+;	given in r23,r22
+;  param3: an 16-bit integer specifing length of block in bits
+;	given in r21,r20
+sha256_lastBlock_localSpace = (SHA256_BLOCK_BITS/8+1)
+
+
+sha256_lastBlock:
+	cpi r21, 0x02
+	brlo sha256_lastBlock_prolog
+	push r25
+	push r24
+	push r23
+	push r22
+	push r21
+	push r20
+	rcall sha256_nextBlock
+	pop r20
+	pop r21
+	pop r22
+	pop r23
+	pop r24
+	pop r25
+	subi r21, 0x02
+	subi r23, -2
+	rjmp sha256_lastBlock	
+sha256_lastBlock_prolog:
+	/* allocate space on stack */
+	in r30, SPL
+	in r31, SPH
+	in r1, SREG
+	subi r30, lo8(64)
+	sbci r31, hi8(64)
+	cli
+	out SPL, r30
+	out SPH, r31
+	out SREG,r1
+
+	adiw r30, 1 /* SP points to next free byte on stack */
+	mov r18, r20 /* r20 = LSB(length) */
+	lsr r18
+	lsr r18
+	lsr r18
+	bst r21, 0	/* may be we should explain this ... */
+	bld r18, 5  /* now: r18 == length/8 (aka. length in bytes) */
+	
+	
+	movw r26, r22 /* X points to begin of msg */
+	tst r18
+	breq sha256_lastBlock_post_copy
+	mov r1, r18
+sha256_lastBlock_copy_loop:
+	ld r0, X+
+	st Z+, r0
+	dec r1
+	brne sha256_lastBlock_copy_loop
+sha256_lastBlock_post_copy:	
+sha256_lastBlock_insert_stuffing_bit:	
+	ldi r19, 0x80
+	mov r0,r19 	
+	ldi r19, 0x07
+	and r19, r20 /* if we are in bitmode */
+	breq 2f	/* no bitmode */
+1:	
+	lsr r0
+	dec r19
+	brne 1b
+	ld r19, X
+/* maybe we should do some ANDing here, just for safety */
+	or r0, r19
+2:	
+	st Z+, r0
+	inc r18
+
+/* checking stuff here */
+	cpi r18, 64-8+1
+	brsh 0f 
+	rjmp sha256_lastBlock_insert_zeros
+0:
+	/* oh shit, we landed here */
+	/* first we have to fill it up with zeros */
+	ldi r19, 64
+	sub r19, r18
+	breq 2f
+1:	
+	st Z+, r1
+	dec r19
+	brne 1b	
+2:	
+	sbiw r30, 63
+	sbiw r30,  1
+	movw r22, r30
+	
+	push r31
+	push r30
+	push r25
+	push r24
+	push r21
+	push r20
+	rcall sha256_nextBlock
+	pop r20
+	pop r21
+	pop r24
+	pop r25
+	pop r30
+	pop r31
+	
+	/* now we should subtract 512 from length */
+	movw r26, r24
+	adiw r26, 4*8+1 /* we can skip the lowest byte */
+	ld r19, X
+	subi r19, hi8(512)
+	st X+, r19
+	ldi r18, 6
+1:
+	ld r19, X
+	sbci r19, 0
+	st X+, r19
+	dec r18
+	brne 1b
+	
+;	clr r18 /* not neccessary ;-) */
+	/* reset Z pointer to begin of block */
+
+sha256_lastBlock_insert_zeros:	
+	ldi r19, 64-8
+	sub r19, r18
+	breq sha256_lastBlock_insert_length
+	clr r1
+1:
+	st Z+, r1	/* r1 is still zero */
+	dec r19
+	brne 1b
+
+;	rjmp sha256_lastBlock_epilog
+sha256_lastBlock_insert_length:
+	movw r26, r24	/* X points to state */
+	adiw r26, 8*4	/* X points to (state.length) */
+	adiw r30, 8		/* Z points one after the last byte of block */
+	ld r0, X+
+	add r0, r20
+	st -Z, r0
+	ld r0, X+
+	adc r0, r21
+	st -Z, r0
+	ldi r19, 6
+1:
+	ld r0, X+
+	adc r0, r1
+	st -Z, r0
+	dec r19
+	brne 1b
+
+	sbiw r30, 64-8
+	movw r22, r30
+	rcall sha256_nextBlock
+
+sha256_lastBlock_epilog:
+	in r30, SPL
+	in r31, SPH
+	in r1, SREG
+	adiw r30, 63 ; lo8(64)
+	adiw r30,  1  ; hi8(64)
+	cli
+	out SPL, r30
+	out SPH, r31
+	out SREG,r1
+	clr r1
+	clr r0
+	ret
+
+/**/
+;###########################################################	
+
+.global sha256_nextBlock
+; === sha256_nextBlock ===
+; this is the core function for calculating SHA-256 hashes
+;  param1: the 16-bit pointer to sha256_ctx structure
+;	given in r25,r24 (r25 is most significant)
+;  param2: an 16-bit pointer to 64 byte block to hash
+;	given in r23,r22
+sha256_nextBlock_localSpace = (64+8)*4 ; 64 32-bit values for w array and 8 32-bit values for a array (total 288 byte)
+
+Bck1 = 12
+Bck2 = 13
+Bck3 = 14
+Bck4 = 15
+Func1 = 22
+Func2 = 23
+Func3 = 24
+Func4 = 25
+Accu1 = 16
+Accu2 = 17
+Accu3 = 18
+Accu4 = 19
+XAccu1 = 8
+XAccu2 = 9
+XAccu3 = 10
+XAccu4 = 11
+T1	= 4
+T2	= 5
+T3	= 6
+T4	= 7
+LoopC = 1
+/* byteorder: high number <--> high significance */
+sha256_nextBlock:
+ ; initial, let's make some space ready for local vars
+	push r4 /* replace push & pop by mem ops? */
+	push r5
+	push r6
+	push r7
+	push r8
+	push r9
+	push r10
+	push r11
+	push r12
+	push r13
+	push r14
+	push r15
+	push r16
+	push r17
+	push r28
+	push r29
+	in r20, SPL
+	in r21, SPH
+	movw r18, r20			;backup SP
+;	movw r26, r20			; X points to free space on stack 
+	movw r30, r22			; Z points to message
+	subi r20, lo8(sha256_nextBlock_localSpace) ;sbiw can do only up to 63
+	sbci r21, hi8(sha256_nextBlock_localSpace)
+	movw r26, r20			; X points to free space on stack 
+	in r0, SREG
+	cli ; we want to be uninterrupted while updating SP
+	out SPL, r20
+	out SPH, r21
+	out SREG, r0
+	push r18
+	push r19
+	push r24
+	push r25 /* param1 will be needed later */
+ ; now we fill the w array with message (think about endianess)
+ 	adiw r26, 1 ; X++
+ 	ldi r20, 16
+sha256_nextBlock_wcpyloop: 	
+ 	ld r23, Z+
+ 	ld r22, Z+
+ 	ld r19, Z+
+ 	ld r18, Z+
+ 	st X+, r18
+ 	st X+, r19
+ 	st X+, r22	
+	st X+, r23
+	dec r20
+	brne sha256_nextBlock_wcpyloop
+/*	for (i=16; i<64; ++i){
+		w[i] = SIGMA_b(w[i-2]) + w[i-7] + SIGMA_a(w[i-15]) + w[i-16];	
+	} */
+	/* r25,r24,r23,r24 (r21,r20) are function values
+	   r19,r18,r17,r16 are the accumulator
+	   r15,r14,r13,rBck1 are backup1
+	   r11,r10,r9 ,r8  are xor accu   
+	   r1 is round counter 								*/
+
+	ldi r20, 64-16
+	mov LoopC, r20
+sha256_nextBlock_wcalcloop:		 
+	movw r30, r26 ; cp X to Z
+	sbiw r30, 63
+	sbiw r30, 1 		; substract 64 = 16*4
+	ld Accu1, Z+
+	ld Accu2, Z+
+	ld Accu3, Z+
+	ld Accu4, Z+ /* w[i] = w[i-16] */
+	ld Bck1, Z+
+	ld Bck2, Z+
+	ld Bck3, Z+
+	ld Bck4, Z+ /* backup = w[i-15] */
+	/* now sigma 0 */
+	mov Func1, Bck2
+	mov Func2, Bck3
+	mov Func3, Bck4
+	mov Func4, Bck1  /* prerotated by 8 */
+	ldi r20, 1
+	rcall bitrotl
+	movw XAccu1, Func1
+	movw XAccu3, Func3	 /* store ROTR(w[i-15],7) in xor accu */
+	movw Func1, Bck3
+	movw Func3, Bck1 /* prerotated by 16 */
+	ldi r20, 2
+	rcall bitrotr
+	eor XAccu1, Func1  /* xor ROTR(w[i-15], 18)*/
+	eor XAccu2, Func2
+	eor XAccu3, Func3
+	eor XAccu4, Func4
+	ldi Func2, 3		 /* now shr3 */ /*we can destroy backup now*/
+sigma0_shr:
+	lsr Bck4
+	ror Bck3
+	ror Bck2
+	ror Bck1	
+	dec Func2
+	brne sigma0_shr
+	eor XAccu1, Bck1
+	eor XAccu2, Bck2
+	eor XAccu3, Bck3
+	eor XAccu4, Bck4	/* xor SHR(w[i-15], 3)*/ /* xor accu == sigma1(w[i-15]) */
+	add Accu1, XAccu1
+	adc Accu2, XAccu2
+	adc Accu3, XAccu3
+	adc Accu4, XAccu4 /* finished with sigma0 */
+	ldd Func1, Z+7*4  /* now accu += w[i-7] */
+	ldd Func2, Z+7*4+1
+	ldd Func3, Z+7*4+2
+	ldd Func4, Z+7*4+3
+	add Accu1, Func1
+	adc Accu2, Func2
+	adc Accu3, Func3
+	adc Accu4, Func4
+	ldd Bck1, Z+12*4 /* now backup = w[i-2]*/
+	ldd Bck2, Z+12*4+1
+	ldd Bck3, Z+12*4+2
+	ldd Bck4, Z+12*4+3
+	/* now sigma 1 */
+	movw Func1, Bck3
+	movw Func3, Bck1 /* prerotated by 16 */
+	ldi r20, 1
+	rcall bitrotr
+	movw XAccu3, Func3
+	movw XAccu1, Func1	 /* store in ROTR(w[i-2], 17) xor accu */
+;	movw Func1, Bck3
+;	movw Func3, Bck1 /* prerotated by 16 */
+	ldi r20, 2
+	rcall bitrotr
+	eor XAccu1, Func1  /* xor ROTR(w[i-2], 19)*/
+	eor XAccu2, Func2
+	eor XAccu3, Func3
+	eor XAccu4, Func4
+	ldi Func2, 2	 /* now shr10 (dirty trick, skipping a byte) */ /*we can destroy backup now*/
+sigma1_shr:
+	lsr Bck4
+	ror Bck3
+	ror Bck2	
+	dec Func2
+	brne sigma1_shr
+	eor XAccu1, Bck2
+	eor XAccu2, Bck3
+	eor XAccu3, Bck4  /* xor SHR(w[i-2], 10)*/ /* xor accu == sigma1(w[i-15]) */
+	add Accu1, XAccu1
+	adc Accu2, XAccu2
+	adc Accu3, XAccu3
+	adc Accu4, XAccu4 /* finished with sigma0 */
+	/* now let's store the shit */
+	st X+, Accu1
+	st X+, Accu2
+	st X+, Accu3
+	st X+, Accu4
+	dec LoopC
+	breq 3f  ; skip if zero
+	rjmp sha256_nextBlock_wcalcloop
+3:
+	/* we are finished with w array X points one byte post w */
+/* init a array */
+	pop r31
+	pop r30
+	push r30
+	push r31
+	ldi r25, 8*4 /* 8 32-bit values to copy from ctx to a array */
+init_a_array:	
+	ld r1, Z+
+	st X+, r1
+	dec r25
+	brne init_a_array
+	
+/* now the real fun begins */
+/* for (i=0; i<64; ++i){
+			t1 = a[7] + SIGMA1(a[4]) + CH(a[4],a[5],a[6]) + k[i] + w[i];
+			t2 = SIGMA0(a[0]) + MAJ(a[0],a[1],a[2]);
+			memmove(&(a[1]), &(a[0]), 7*4); 	// a[7]=a[6]; a[6]=a[5]; a[5]=a[4]; a[4]=a[3]; a[3]=a[2]; a[2]=a[1]; a[1]=a[0]; 
+			a[4] += t1;
+			a[0] = t1 + t2;
+		} */
+	/* Y points to a[0], Z ('cause lpm wants it) points to k[i], X points to w[i] */
+	sbiw r26, 8*4  /* X still points at a[7]+1*/
+	movw r28, r26
+	ldi r30, lo8(sha256_kv)
+	ldi r31, hi8(sha256_kv)		
+	dec r27  /* X - (64*4 == 256) */
+	ldi r25, 64
+	mov LoopC, r25
+sha256_main_loop:
+	/* now calculate t1 */
+	 /*CH(x,y,z) = (x&y)^((~x)&z)*/
+	ldd T1, Y+5*4
+	ldd T2, Y+5*4+1
+	ldd T3, Y+5*4+2
+	ldd T4, Y+5*4+3 /* y in T */
+	ldd Func1, Y+4*4
+	ldd Func2, Y+4*4+1
+	ldd Func3, Y+4*4+2
+	ldd Func4, Y+4*4+3  /* x in Func */
+	ldd Bck1, Y+6*4
+	ldd Bck2, Y+6*4+1
+	ldd Bck3, Y+6*4+2
+	ldd Bck4, Y+6*4+3 /* z in Bck */
+	and T1, Func1
+	and T2, Func2
+	and T3, Func3
+	and T4, Func4
+	com Func1
+	com Func2
+	com Func3
+	com Func4
+	and Bck1, Func1
+	and Bck2, Func2
+	and Bck3, Func3
+	and Bck4, Func4
+	eor T1, Bck1
+	eor T2, Bck2
+	eor T3, Bck3
+	eor T4, Bck4 /* done, CH(x,y,z) is in T */
+	/* now SIGMA1(a[4]) */
+	ldd Bck4, Y+4*4		/* think about using it from Func reg above*/
+	ldd Bck1, Y+4*4+1	
+	ldd Bck2, Y+4*4+2
+	ldd Bck3, Y+4*4+3 /* load prerotate by 8-bit */	
+	movw Func1, Bck1
+	movw Func3, Bck3
+	ldi r20, 2 
+	rcall bitrotl		/* rotr(x,6) */ 
+	movw XAccu1, Func1
+	movw XAccu3, Func3
+	movw Func1, Bck1
+	movw Func3, Bck3
+	ldi r20, 3 
+	rcall bitrotr 	/* rotr(x,11) */
+	eor XAccu1, Func1
+	eor XAccu2, Func2
+	eor XAccu3, Func3
+	eor XAccu4, Func4
+	movw Func1, Bck3 /* this prerotates furteh 16 bits*/
+	movw Func3, Bck1 /* so we have now prerotated by 24 bits*/
+	ldi r20, 1 
+	rcall bitrotr 	/* rotr(x,11) */
+	eor XAccu1, Func1
+	eor XAccu2, Func2
+	eor XAccu3, Func3
+	eor XAccu4, Func4 /* finished with SIGMA1, add it to T */
+	add T1, XAccu1
+	adc T2, XAccu2
+	adc T3, XAccu3
+	adc T4, XAccu4
+	/* now we've to add a[7], w[i] and k[i] */
+	ldd XAccu1, Y+4*7
+	ldd XAccu2, Y+4*7+1
+	ldd XAccu3, Y+4*7+2
+	ldd XAccu4, Y+4*7+3
+	add T1, XAccu1
+	adc T2, XAccu2
+	adc T3, XAccu3
+	adc T4, XAccu4 /* add a[7] */
+	ld XAccu1, X+
+	ld XAccu2, X+
+	ld XAccu3, X+
+	ld XAccu4, X+
+	add T1, XAccu1
+	adc T2, XAccu2
+	adc T3, XAccu3
+	adc T4, XAccu4 /* add w[i] */
+	lpm XAccu1, Z+
+	lpm XAccu2, Z+
+	lpm XAccu3, Z+
+	lpm XAccu4, Z+
+	add T1, XAccu1
+	adc T2, XAccu2
+	adc T3, XAccu3
+	adc T4, XAccu4 /* add k[i] */ /* finished with t1 */
+	/*now t2 = SIGMA0(a[0]) + MAJ(a[0],a[1],a[2]) */ /*i did to much x86 asm, i always see 4 32bit regs*/
+		/* starting with MAJ(x,y,z) */
+	ldd Func1, Y+4*0+0
+	ldd Func2, Y+4*0+1
+	ldd Func3, Y+4*0+2
+	ldd Func4, Y+4*0+3 /* load x=a[0] */
+	ldd XAccu1, Y+4*1+0
+	ldd XAccu2, Y+4*1+1
+	ldd XAccu3, Y+4*1+2
+	ldd XAccu4, Y+4*1+3 /* load y=a[1] */
+	and XAccu1, Func1
+	and XAccu2, Func2
+	and XAccu3, Func3
+	and XAccu4, Func4	/* XAccu == (x & y) */
+	ldd Bck1, Y+4*2+0
+	ldd Bck2, Y+4*2+1
+	ldd Bck3, Y+4*2+2
+	ldd Bck4, Y+4*2+3 /* load z=a[2] */
+	and Func1, Bck1
+	and Func2, Bck2
+	and Func3, Bck3
+	and Func4, Bck4
+	eor XAccu1, Func1
+	eor XAccu2, Func2
+	eor XAccu3, Func3
+	eor XAccu4, Func4	/* XAccu == (x & y) ^ (x & z) */
+	ldd Func1, Y+4*1+0
+	ldd Func2, Y+4*1+1
+	ldd Func3, Y+4*1+2
+	ldd Func4, Y+4*1+3 /* load y=a[1] */
+	and Func1, Bck1
+	and Func2, Bck2
+	and Func3, Bck3
+	and Func4, Bck4
+	eor XAccu1, Func1
+	eor XAccu2, Func2
+	eor XAccu3, Func3
+	eor XAccu4, Func4	/* XAccu == Maj(x,y,z) == (x & y) ^ (x & z) ^ (y & z) */
+   	/* SIGMA0(a[0]) */
+	ldd Bck1, Y+4*0+0 /* we should combine this with above */
+	ldd Bck2, Y+4*0+1
+	ldd Bck3, Y+4*0+2
+	ldd Bck4, Y+4*0+3
+	movw Func1, Bck1
+	movw Func3, Bck3
+	ldi r20, 2
+	rcall bitrotr
+	movw Accu1, Func1
+	movw Accu3, Func3 /* Accu = shr(a[0], 2) */
+	movw Func1, Bck3 
+	movw Func3, Bck1 /* prerotate by 16 bits */
+	ldi r20, 3
+	rcall bitrotl
+	eor Accu1, Func1
+	eor Accu2, Func2
+	eor Accu3, Func3
+	eor Accu4, Func4 /* Accu ^= shr(a[0], 13) */
+	mov Func1, Bck4
+	mov Func2, Bck1
+	mov Func3, Bck2
+	mov Func4, Bck3  /* prerotate by 24 bits */
+	ldi r20, 2
+	rcall bitrotl
+	eor Accu1, Func1
+	eor Accu2, Func2
+	eor Accu3, Func3
+	eor Accu4, Func4 /* Accu ^= shr(a[0], 22) */
+	add Accu1, XAccu1 /* add previous result (MAJ)*/
+	adc Accu2, XAccu2
+	adc Accu3, XAccu3
+	adc Accu4, XAccu4
+	/* now we are finished with the computing stuff (t1 in T, t2 in Accu)*/
+	/* a[7]=a[6]; a[6]=a[5]; a[5]=a[4]; a[4]=a[3]; a[3]=a[2]; a[2]=a[1]; a[1]=a[0]; */
+
+	ldi r21, 7*4
+	adiw r28, 7*4
+a_shift_loop:
+	ld  r25, -Y /* warning: this is PREdecrement */
+	std Y+4, r25
+	dec r21
+	brne a_shift_loop
+
+	ldd Bck1, Y+4*4+0
+	ldd Bck2, Y+4*4+1
+	ldd Bck3, Y+4*4+2
+	ldd Bck4, Y+4*4+3
+	add Bck1, T1
+	adc Bck2, T2
+	adc Bck3, T3
+	adc Bck4, T4
+	std Y+4*4+0, Bck1
+	std Y+4*4+1, Bck2
+	std Y+4*4+2, Bck3
+	std Y+4*4+3, Bck4
+	add Accu1, T1
+	adc Accu2, T2
+	adc Accu3, T3
+	adc Accu4, T4
+	std Y+4*0+0, Accu1
+	std Y+4*0+1, Accu2
+	std Y+4*0+2, Accu3
+	std Y+4*0+3, Accu4 /* a array updated */
+	
+	
+	dec LoopC
+	breq update_state
+	rjmp sha256_main_loop ;brne sha256_main_loop
+update_state:	
+	/* update state */
+	/* pointers to state should still exist on the stack ;-) */
+	pop r31
+	pop r30
+	ldi r21, 8
+update_state_loop:
+	ldd Accu1, Z+0
+	ldd Accu2, Z+1
+	ldd Accu3, Z+2
+	ldd Accu4, Z+3 
+	ld Func1, Y+
+	ld Func2, Y+
+	ld Func3, Y+
+	ld Func4, Y+
+	add Accu1, Func1
+	adc Accu2, Func2
+	adc Accu3, Func3
+	adc Accu4, Func4
+	st Z+, Accu1
+	st Z+, Accu2
+	st Z+, Accu3
+	st Z+, Accu4
+	dec r21
+	brne update_state_loop
+	/* now we just have to update the length */
+	adiw r30, 1 /* since we add 512, we can simply skip the LSB */ 
+	ldi r21, 2
+	ldi r22, 6
+	ld r20, Z
+	add r20, r21
+	st Z+, r20	
+	clr r21
+sha256_nextBlock_fix_length:	
+	brcc sha256_nextBlock_epilog
+	ld r20, Z
+	adc r20, r21
+	st Z+, r20
+	dec r22
+	brne sha256_nextBlock_fix_length
+	
+; EPILOG
+sha256_nextBlock_epilog:
+/* now we should clean up the stack */
+	
+	pop r21
+	pop r20
+	in r0, SREG
+	cli ; we want to be uninterrupted while updating SP
+	out SPL, r20
+	out SPH, r21
+	out SREG, r0
+	
+	clr r1
+	pop r29
+	pop r28
+	pop r17
+	pop r16
+	pop r15
+	pop r14
+	pop r13
+	pop r12
+	pop r11
+	pop r10
+	pop r9
+	pop r8
+	pop r7
+	pop r6
+	pop r5
+	pop r4 
+	ret
+
+sha256_kv: ; round-key-vector stored in ProgMem 
+.word	0x2f98, 0x428a, 0x4491, 0x7137, 0xfbcf, 0xb5c0, 0xdba5, 0xe9b5, 0xc25b, 0x3956, 0x11f1, 0x59f1, 0x82a4, 0x923f, 0x5ed5, 0xab1c
+.word	0xaa98, 0xd807, 0x5b01, 0x1283, 0x85be, 0x2431, 0x7dc3, 0x550c, 0x5d74, 0x72be, 0xb1fe, 0x80de, 0x06a7, 0x9bdc, 0xf174, 0xc19b
+.word	0x69c1, 0xe49b, 0x4786, 0xefbe, 0x9dc6, 0x0fc1, 0xa1cc, 0x240c, 0x2c6f, 0x2de9, 0x84aa, 0x4a74, 0xa9dc, 0x5cb0, 0x88da, 0x76f9
+.word	0x5152, 0x983e, 0xc66d, 0xa831, 0x27c8, 0xb003, 0x7fc7, 0xbf59, 0x0bf3, 0xc6e0, 0x9147, 0xd5a7, 0x6351, 0x06ca, 0x2967, 0x1429
+.word	0x0a85, 0x27b7, 0x2138, 0x2e1b, 0x6dfc, 0x4d2c, 0x0d13, 0x5338, 0x7354, 0x650a, 0x0abb, 0x766a, 0xc92e, 0x81c2, 0x2c85, 0x9272
+.word	0xe8a1, 0xa2bf, 0x664b, 0xa81a, 0x8b70, 0xc24b, 0x51a3, 0xc76c, 0xe819, 0xd192, 0x0624, 0xd699, 0x3585, 0xf40e, 0xa070, 0x106a
+.word	0xc116, 0x19a4, 0x6c08, 0x1e37, 0x774c, 0x2748, 0xbcb5, 0x34b0, 0x0cb3, 0x391c, 0xaa4a, 0x4ed8, 0xca4f, 0x5b9c, 0x6ff3, 0x682e
+.word	0x82ee, 0x748f, 0x636f, 0x78a5, 0x7814, 0x84c8, 0x0208, 0x8cc7, 0xfffa, 0x90be, 0x6ceb, 0xa450, 0xa3f7, 0xbef9, 0x78f2, 0xc671
+
+	
+;###########################################################	
+
+.global sha256_init 
+;uint32_t sha256_init_vector[]={
+;  	0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A,
+;	0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19 };
+;
+;void sha256_init(sha256_ctx_t *state){
+;	state->length=0;
+;	memcpy(state->h, sha256_init_vector, 8*4);
+;}
+; param1: (r23,r24) 16-bit pointer to sha256_ctx_t struct in ram
+; modifys: Z(r30,r31), Func1, r22
+sha256_init:
+	movw r26, r24 ; (24,25) --> (26,27) load X with param1
+	ldi r30, lo8((sha256_init_vector))
+	ldi r31, hi8((sha256_init_vector))
+	ldi r22, 32+8
+sha256_init_vloop:	
+	lpm r23, Z+ 
+	st X+, r23
+	dec r22
+	brne sha256_init_vloop
+	ret
+	
+sha256_init_vector:
+.word 0xE667, 0x6A09
+.word 0xAE85, 0xBB67 
+.word 0xF372, 0x3C6E 
+.word 0xF53A, 0xA54F 
+.word 0x527F, 0x510E 
+.word 0x688C, 0x9B05 
+.word 0xD9AB, 0x1F83 
+.word 0xCD19, 0x5BE0
+.word 0x0000, 0x0000
+.word 0x0000, 0x0000
+
+;###########################################################	
+
+.global rotl32
+; === ROTL32 ===
+; function that rotates a 32 bit word to the left
+;  param1: the 32-bit word to rotate
+;	given in r25,r24,r23,r22 (r25 is most significant)
+;  param2: an 8-bit value telling how often to rotate
+;	given in r20
+; modifys: r21, r22
+rotl32:
+	cpi r20, 8
+	brlo bitrotl
+	mov r21, r25
+	mov r25, r24
+	mov r24, r23
+	mov r23, r22
+	mov r22, r21
+	subi r20, 8
+	rjmp rotl32
+bitrotl:
+	clr r21
+	clc
+bitrotl_loop:	
+	tst r20
+	breq fixrotl
+	rol r22
+	rol r23
+	rol r24
+	rol r25
+	rol r21
+	dec r20
+	rjmp bitrotl_loop
+fixrotl:
+	or r22, r21
+	ret
+	
+
+;###########################################################	
+
+.global rotr32
+; === ROTR32 ===
+; function that rotates a 32 bit word to the right
+;  param1: the 32-bit word to rotate
+;	given in r25,r24,r23,22 (r25 is most significant)
+;  param2: an 8-bit value telling how often to rotate
+;	given in r20
+; modifys: r21, r22
+rotr32:
+	cpi r20, 8
+	brlo bitrotr
+	mov r21, r22
+	mov r22, r23
+	mov r23, r24
+	mov r24, r25
+	mov r25, r21
+	subi r20, 8
+	rjmp rotr32
+bitrotr:
+	clr r21
+	clc
+bitrotr_loop:	
+	tst r20
+	breq fixrotr
+	ror r25
+	ror r24
+	ror r23
+	ror r22
+	ror r21
+	dec r20
+	rjmp bitrotr_loop
+fixrotr:
+	or r25, r21
+	ret
+	
+	
+;###########################################################	
+	
+.global change_endian32
+; === change_endian32 ===
+; function that changes the endianess of a 32-bit word
+;  param1: the 32-bit word
+;	given in r25,r24,r23,22 (r25 is most significant)
+;  modifys: r21, r22
+change_endian32:
+	movw r20,  r22 ; (r22,r23) --> (r20,r21)
+	mov r22, r25
+	mov r23, r24
+	mov r24, r21
+	mov r25, r20 
+	ret
+
diff --git a/shabea/sha256.h b/shabea/sha256.h
new file mode 100644
index 0000000..24960a3
--- /dev/null
+++ b/shabea/sha256.h
@@ -0,0 +1,122 @@
+/* sha256.h */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/**
+ * \file	sha256.h
+ * \author  Daniel Otte 
+ * \date    2006-05-16
+ * \license	GPLv3 or later
+ * 
+ */
+
+#ifndef SHA256_H_
+#define SHA256_H_
+
+#define __LITTLE_ENDIAN__
+
+
+#include <stdint.h>
+
+/** \def SHA256_HASH_BITS
+ * defines the size of a SHA-256 hash value in bits
+ */
+
+/** \def SHA256_HASH_BYTES
+ * defines the size of a SHA-256 hash value in bytes
+ */
+
+/** \def SHA256_BLOCK_BITS
+ * defines the size of a SHA-256 input block in bits
+ */
+
+/** \def SHA256_BLOCK_BYTES
+ * defines the size of a SHA-256 input block in bytes
+ */
+
+#define SHA256_HASH_BITS  256
+#define SHA256_HASH_BYTES (SHA256_HASH_BITS/8)
+#define SHA256_BLOCK_BITS 512
+#define SHA256_BLOCK_BYTES (SHA256_BLOCK_BITS/8)
+
+/** \typedef sha256_ctx_t
+ * \brief SHA-256 context type
+ * 
+ * A variable of this type may hold the state of a SHA-256 hashing process
+ */
+typedef struct {
+	uint32_t h[8];
+	uint64_t length;
+} sha256_ctx_t;
+
+/** \typedef sha256_hash_t
+ * \brief SHA-256 hash value type
+ * 
+ * A variable of this type may hold the hash value produced by the
+ * sha256_ctx2hash(sha256_hash_t* dest, const sha256_ctx_t* state) function.
+ */
+typedef uint8_t sha256_hash_t[SHA256_HASH_BYTES];
+
+/** \fn void sha256_init(sha256_ctx_t *state)
+ * \brief initialise a SHA-256 context
+ * 
+ * This function sets a ::sha256_ctx_t to the initial values for hashing.
+ * \param state pointer to the SHA-256 hashing context
+ */
+void sha256_init(sha256_ctx_t *state);
+
+/** \fn void sha256_nextBlock (sha256_ctx_t* state, const void* block)
+ * \brief update the context with a given block
+ * 
+ * This function updates the SHA-256 hash context by processing the given block
+ * of fixed length.
+ * \param state pointer to the SHA-256 hash context
+ * \param block pointer to the block of fixed length (512 bit = 64 byte)
+ */
+void sha256_nextBlock (sha256_ctx_t* state, const void* block);
+
+/** \fn void sha256_lastBlock(sha256_ctx_t* state, const void* block, uint16_t length_b)
+ * \brief finalize the context with the given block 
+ * 
+ * This function finalizes the SHA-256 hash context by processing the given block
+ * of variable length.
+ * \param state pointer to the SHA-256 hash context
+ * \param block pointer to the block of fixed length (512 bit = 64 byte)
+ * \param length_b the length of the block in bits
+ */
+void sha256_lastBlock(sha256_ctx_t* state, const void* block, uint16_t length_b);
+
+/** \fn void sha256_ctx2hash(sha256_hash_t* dest, const sha256_ctx_t* state)
+ * \brief convert the hash state into the hash value
+ * This function reads the context and writes the hash value to the destination
+ * \param dest pointer to the location where the hash value should be written
+ * \param state pointer to the SHA-256 hash context
+ */
+void sha256_ctx2hash(sha256_hash_t* dest, const sha256_ctx_t* state);
+
+/** \fn void sha256(sha256_hash_t* dest, const void* msg, uint32_t length_b)
+ * \brief simple SHA-256 hashing function for direct hashing
+ * 
+ * This function automaticaly hashes a given message of arbitary length with
+ * the SHA-256 hashing algorithm.
+ * \param dest pointer to the location where the hash value is going to be written to
+ * \param msg pointer to the message thats going to be hashed
+ * \param length_b length of the message in bits
+ */
+void sha256(sha256_hash_t* dest, const void* msg, uint32_t length_b);
+
+#endif /*SHA256_H_*/
diff --git a/shabea/shabea.c b/shabea/shabea.c
new file mode 100644
index 0000000..b59e138
--- /dev/null
+++ b/shabea/shabea.c
@@ -0,0 +1,91 @@
+/* shabea.c */
+/*
+ *   This file is part of AnonAccess, an access system which can be used
+ *    to open door or doing other things with an anonymity featured
+ *    account managment.
+ *   Copyright (C) 2006, 2007, 2008  Daniel Otte (daniel.otte@rub.de)
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 3 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/**
+ * \file	shabea.c
+ * \author	Daniel Otte 
+ * \date	2007-06-07
+ * \brief	SHABEA - a SHA Based Encryption Algorithm implementation
+ * \par License	
+ * GPL
+ * 
+ * SHABEAn-r where n is the blocksize and r the number of round used
+ * 
+ * 
+ */
+#include <stdlib.h>
+#include <string.h>
+#include "sha256.h"
+
+#include "config.h"
+#include "memxor.h"
+
+
+/*
+ * SHABEA256-n
+ */ 
+ 
+#define SHABEA_BLOCKSIZE 256
+#define SHABEA_BLOCKSIZEB (SHABEA_BLOCKSIZE/8)
+#define SHABEA_HALFSIZEB  (SHABEA_BLOCKSIZEB/2)
+#define SHABEA_HALFSIZE (SHABEA_BLOCKSIZE/2)
+
+#define L ((uint8_t*)block+ 0)
+#define R ((uint8_t*)block+16)
+void shabea256(void * block, void * key, uint16_t keysize_b, uint8_t enc, uint8_t rounds){
+	uint8_t r;		/**/
+	uint8_t tb[SHABEA_HALFSIZEB+2+(keysize_b+7)/8];	/**/
+	uint16_t kbs;	/* bytes used for the key / temporary block */
+	sha256_hash_t hash;
+	uint8_t termcond; 
+	int8_t dir;
+	if(enc){
+		r = 0;
+		termcond = rounds-1;
+		dir = 1;
+	} else {
+		r = rounds-1;
+		termcond = 0;
+		dir = -1;
+	}
+	kbs = (keysize_b+7)/8;
+	memcpy(tb+SHABEA_HALFSIZEB+2, key, kbs); /* copy key to temporary block */
+	tb[SHABEA_HALFSIZEB+0] = 0;	/* set round counter high value to zero */
+	
+	for(;;r+=dir){ /* enc: 0..(rounds-1) ; !enc: (rounds-1)..0 */
+		memcpy(tb, R, SHABEA_HALFSIZEB); /* copy right half into tb */
+		tb[SHABEA_HALFSIZEB+1] = r;
+		sha256(&hash, tb, SHABEA_HALFSIZE+16+keysize_b);
+		if(r!=termcond){	
+			/* swap */
+			memxor(hash, L, SHABEA_HALFSIZEB);
+			memcpy(L, R, SHABEA_HALFSIZEB);
+			memcpy(R, hash, SHABEA_HALFSIZEB);
+		} else {
+			/* last round */
+			/* no swap */
+			memxor(L, hash, SHABEA_HALFSIZEB);
+			return;	
+		}
+	}
+}
+
+
diff --git a/shabea/shabea.h b/shabea/shabea.h
new file mode 100644
index 0000000..fdb4916
--- /dev/null
+++ b/shabea/shabea.h
@@ -0,0 +1,39 @@
+/* shabea.h */
+/*
+ *   This file is part of AnonAccess, an access system which can be used
+ *    to open door or doing other things with an anonymity featured
+ *    account managment.
+ *   Copyright (C) 2006, 2007, 2008  Daniel Otte (daniel.otte@rub.de)
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 3 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+/**
+ * \file	shabea.h
+ * \author	Daniel Otte 
+ * \date	2007-06-07
+ * \brief	SHABEA - a SHA Based Encryption Algorithm declarations
+ * \par License	
+ * GPL
+ * 
+ * SHABEAn-r where n is the blocksize and r the number of round used
+ * 
+ */
+ 
+#ifndef SHABEA_H_
+#define SHABEA_H_
+
+void shabea256(void * block, void * key, uint16_t keysize_b, uint8_t enc, uint8_t rounds);
+#endif /*SHABEA_H_*/
diff --git a/shacal1/sha1-asm.S b/shacal1/sha1-asm.S
new file mode 100644
index 0000000..f571685
--- /dev/null
+++ b/shacal1/sha1-asm.S
@@ -0,0 +1,886 @@
+/* sha1-asm.S */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/*
+ * Author:	Daniel Otte
+ *
+ * License: GPLv3 or later
+*/
+; SHA1 implementation in assembler for AVR
+SHA1_BLOCK_BITS = 512
+SHA1_HASH_BITS = 160
+
+.macro precall
+	/* push r18 - r27, r30 - r31*/
+	push r0
+	push r1
+	push r18
+	push r19
+	push r20
+	push r21
+	push r22
+	push r23
+	push r24
+	push r25
+	push r26
+	push r27
+	push r30
+	push r31
+	clr r1
+.endm
+
+.macro postcall
+	pop r31
+	pop r30
+	pop r27
+	pop r26
+	pop r25
+	pop r24
+	pop r23
+	pop r22
+	pop r21
+	pop r20
+	pop r19
+	pop r18
+	pop r1
+	pop r0
+.endm
+
+
+.macro hexdump length
+	push r27
+	push r26
+	ldi r25, '\r'
+	mov r24, r25
+	call uart_putc
+	ldi r25, '\n'
+	mov r24, r25
+	call uart_putc
+	pop r26
+	pop r27
+	movw r24, r26
+.if \length > 16
+	ldi r22, lo8(16)
+	ldi r23, hi8(16)
+	push r27
+	push r26
+	call uart_hexdump
+	pop r26
+	pop r27
+	adiw r26, 16
+	hexdump \length-16
+.else
+	ldi r22, lo8(\length)
+	ldi r23, hi8(\length)
+	call uart_hexdump
+.endif
+.endm
+
+.macro delay
+/*	
+	push r0
+	push r1
+	clr r0
+1:	clr r1
+2:	dec r1
+	brne 2b
+	dec r0
+	brne 1b
+	pop r1
+	pop r0  // */
+.endm
+
+/* X points to Block */
+.macro dbg_hexdump length
+/*	
+	precall
+	hexdump \length
+	postcall
+	// */
+.endm
+
+
+
+.section .text
+
+SPL = 0x3D
+SPH = 0x3E
+SREG = 0x3F
+
+
+;
+;sha1_ctx_t is:
+;
+; [h0][h1][h2][h3][h4][length]
+; hn is 32 bit large, length is 64 bit large
+
+;###########################################################	
+
+.global sha1_ctx2hash
+; === sha1_ctx2hash ===
+; this function converts a state into a normal hash (bytestring)
+;  param1: the 16-bit destination pointer
+;	given in r25,r24 (r25 is most significant)
+;  param2: the 16-bit pointer to sha1_ctx structure
+;	given in r23,r22
+sha1_ctx2hash:
+	movw r26, r22
+	movw r30, r24
+	ldi r21, 5
+	sbiw r26, 4
+1:	
+	ldi r20, 4
+	adiw r26, 8
+2:	
+		ld r0, -X
+		st Z+, r0	
+	dec r20
+	brne 2b
+	
+	dec r21
+	brne 1b
+	
+	ret
+
+;###########################################################	
+
+.global sha1
+; === sha1 ===
+; this function calculates SHA-1 hashes from messages in RAM
+;  param1: the 16-bit hash destination pointer
+;	given in r25,r24 (r25 is most significant)
+;  param2: the 16-bit pointer to message
+;	given in r23,r22
+;  param3: 32-bit length value (length of message in bits)
+;   given in r21,r20,r19,r18
+sha1:
+sha1_prolog:
+	push r8
+	push r9
+	push r10
+	push r11
+	push r12
+	push r13
+	push r16
+	push r17
+	in r16, SPL
+	in r17, SPH
+	subi r16, 5*4+8 
+	sbci r17, 0	
+	in r0, SREG
+	cli
+	out SPL, r16
+	out SPH, r17
+	out SREG, r0
+	
+	push r25
+	push r24
+	inc r16
+	adc r17, r1
+	
+	movw r8, r18		/* backup of length*/
+	movw r10, r20
+	
+	movw r12, r22	/* backup pf msg-ptr */
+	
+	movw r24, r16
+	rcall sha1_init
+	/* if length >= 512 */
+1:
+	tst r11
+	brne 4f
+	tst r10
+	brne 4f
+	mov r19, r9
+	cpi r19, 0x02
+	brlo 4f
+	
+	movw r24, r16
+	movw r22, r12
+	rcall sha1_nextBlock
+	ldi r19, 0x64
+	add r22, r19
+	adc r23, r1
+	/* length -= 512 */
+	ldi r19, 0x02
+	sub r9, r19
+	sbc r10, r1
+	sbc r11, r1
+	rjmp 1b
+	
+4:
+	movw r24, r16
+	movw r22, r12
+	movw r20, r8
+	rcall sha1_lastBlock
+	
+	pop r24
+	pop r25
+	movw r22, r16
+	rcall sha1_ctx2hash	
+	
+sha1_epilog:
+	in r30, SPL
+	in r31, SPH
+	adiw r30, 5*4+8 	
+	in r0, SREG
+	cli
+	out SPL, r30
+	out SPH, r31
+	out SREG, r0
+	pop r17
+	pop r16
+	pop r13
+	pop r12
+	pop r11
+	pop r10
+	pop r9
+	pop r8
+	ret
+
+;###########################################################	
+
+
+; block MUST NOT be larger than 64 bytes
+
+.global sha1_lastBlock
+; === sha1_lastBlock ===
+; this function does padding & Co. for calculating SHA-1 hashes
+;  param1: the 16-bit pointer to sha1_ctx structure
+;	given in r25,r24 (r25 is most significant)
+;  param2: an 16-bit pointer to 64 byte block to hash
+;	given in r23,r22
+;  param3: an 16-bit integer specifing length of block in bits
+;	given in r21,r20
+sha1_lastBlock_localSpace = (SHA1_BLOCK_BITS/8+1)
+
+
+sha1_lastBlock:
+	cpi r21, 0x02
+	brlo sha1_lastBlock_prolog
+	push r25
+	push r24
+	push r23
+	push r22
+	push r21
+	push r20
+	rcall sha1_nextBlock
+	pop r20
+	pop r21
+	pop r22
+	pop r23
+	pop r24
+	pop r25
+	subi r21, 2
+	subi r23, -2
+	rjmp sha1_lastBlock
+sha1_lastBlock_prolog:
+	/* allocate space on stack */
+	in r30, SPL
+	in r31, SPH
+	in r1, SREG
+	subi r30, lo8(64)
+	sbci r31, hi8(64) /* ??? */
+	cli
+	out SPL, r30
+	out SPH, r31
+	out SREG,r1
+
+	adiw r30, 1 /* SP points to next free byte on stack */
+	mov r18, r20 /* r20 = LSB(length) */
+	lsr r18
+	lsr r18
+	lsr r18
+	bst r21, 0	/* may be we should explain this ... */
+	bld r18, 5  /* now: r18 == length/8 (aka. length in bytes) */
+	
+	
+	movw r26, r22 /* X points to begin of msg */
+	tst r18
+	breq sha1_lastBlock_post_copy
+	mov r1, r18
+sha1_lastBlock_copy_loop:
+	ld r0, X+
+	st Z+, r0
+	dec r1
+	brne sha1_lastBlock_copy_loop
+sha1_lastBlock_post_copy:	
+sha1_lastBlock_insert_stuffing_bit:	
+	ldi r19, 0x80
+	mov r0,r19 	
+	ldi r19, 0x07
+	and r19, r20 /* if we are in bitmode */
+	breq 2f	/* no bitmode */
+1:	
+	lsr r0
+	dec r19
+	brne 1b
+	ld r19, X
+/* maybe we should do some ANDing here, just for safety */
+	or r0, r19
+2:	
+	st Z+, r0
+	inc r18
+
+/* checking stuff here */
+	cpi r18, 64-8+1
+	brsh 0f 
+	rjmp sha1_lastBlock_insert_zeros
+0:
+	/* oh shit, we landed here */
+	/* first we have to fill it up with zeros */
+	ldi r19, 64
+	sub r19, r18
+	breq 2f
+1:	
+	st Z+, r1
+	dec r19
+	brne 1b	
+2:	
+	sbiw r30, 63
+	sbiw r30,  1
+	movw r22, r30
+	
+	push r31
+	push r30
+	push r25
+	push r24
+	push r21
+	push r20
+	rcall sha1_nextBlock
+	pop r20
+	pop r21
+	pop r24
+	pop r25
+	pop r30
+	pop r31
+	
+	/* now we should subtract 512 from length */
+	movw r26, r24
+	adiw r26, 4*5+1 /* we can skip the lowest byte */
+	ld r19, X
+	subi r19, hi8(512)
+	st X+, r19
+	ldi r18, 6
+1:
+	ld r19, X
+	sbci r19, 0
+	st X+, r19
+	dec r18
+	brne 1b
+	
+;	clr r18 /* not neccessary ;-) */
+	/* reset Z pointer to begin of block */
+
+sha1_lastBlock_insert_zeros:	
+	ldi r19, 64-8
+	sub r19, r18
+	breq sha1_lastBlock_insert_length
+	clr r1
+1:
+	st Z+, r1	/* r1 is still zero */
+	dec r19
+	brne 1b
+
+;	rjmp sha1_lastBlock_epilog
+sha1_lastBlock_insert_length:
+	movw r26, r24	/* X points to state */
+	adiw r26, 5*4	/* X points to (state.length) */
+	adiw r30, 8		/* Z points one after the last byte of block */
+	ld r0, X+
+	add r0, r20
+	st -Z, r0
+	ld r0, X+
+	adc r0, r21
+	st -Z, r0
+	ldi r19, 6
+1:
+	ld r0, X+
+	adc r0, r1
+	st -Z, r0
+	dec r19
+	brne 1b
+
+	sbiw r30, 64-8
+	movw r22, r30
+	rcall sha1_nextBlock
+
+sha1_lastBlock_epilog:
+	in r30, SPL
+	in r31, SPH
+	in r1, SREG
+	adiw r30, 63 ; lo8(64)
+	adiw r30,  1  ; hi8(64)
+	cli
+	out SPL, r30
+	out SPH, r31
+	out SREG,r1
+	clr r1
+	clr r0
+	ret
+
+/**/
+;###########################################################	
+
+.global sha1_nextBlock
+; === sha1_nextBlock ===
+; this is the core function for calculating SHA-1 hashes
+;  param1: the 16-bit pointer to sha1_ctx structure
+;	given in r25,r24 (r25 is most significant)
+;  param2: an 16-bit pointer to 64 byte block to hash
+;	given in r23,r22
+sha1_nextBlock_localSpace = (16+5+1)*4 ; 16 32-bit values for w array and 5 32-bit values for a array (total 84 byte)
+
+xtmp = 0
+xNULL = 1
+W1 = 10
+W2 = 11
+T1	= 12
+T2	= 13
+T3	= 14
+T4	= 15
+LoopC = 16
+S	  = 17
+tmp1 = 18
+tmp2 = 19
+tmp3 = 20
+tmp4 = 21
+F1 = 22
+F2 = 23
+F3 = 24
+F4 = 25
+
+/* byteorder: high number <--> high significance */
+sha1_nextBlock:
+ ; initial, let's make some space ready for local vars
+ 			 /* replace push & pop by mem ops? */
+	push r10
+	push r11
+	push r12
+	push r13
+	push r14
+	push r15
+	push r16
+	push r17
+	push r28
+	push r29
+	in r20, SPL
+	in r21, SPH
+	movw r18, r20			;backup SP
+;	movw r26, r20			; X points to free space on stack /* maybe removeable? */ 
+	movw r30, r22			; Z points to message
+	subi r20, lo8(sha1_nextBlock_localSpace) ;sbiw can do only up to 63
+	sbci r21, hi8(sha1_nextBlock_localSpace)
+	movw r26, r20			; X points to free space on stack 
+	in r0, SREG
+	cli ; we want to be uninterrupted while updating SP
+	out SPL, r20
+	out SPH, r21
+	out SREG, r0
+	
+	push r18
+	push r19 /* push old SP on new stack */
+	push r24
+	push r25 /* param1 will be needed later */
+	
+	/* load a[] with state */
+	movw 28, r24 /* load pointer to state in Y */
+	adiw r26, 1 ; X++
+
+	ldi LoopC, 5*4	
+1:	ld tmp1, Y+
+	st X+, tmp1
+	dec LoopC
+	brne 1b
+
+	movw W1, r26 /* save pointer to w[0] */
+	/* load w[] with endian fixed message */
+		/* we might also use the changeendian32() function at bottom */
+	movw r30, r22 /* mv param2 (ponter to msg) to Z */	
+	ldi LoopC, 16
+1:
+	ldd tmp1, Z+3
+	st X+, tmp1
+	ldd tmp1, Z+2
+	st X+, tmp1
+	ldd tmp1, Z+1
+	st X+, tmp1
+	ld tmp1, Z
+	st X+, tmp1
+	adiw r30, 4
+	dec LoopC
+	brne 1b
+	
+	;clr LoopC /* LoopC is named t in FIPS 180-2 */	
+	clr xtmp
+sha1_nextBlock_mainloop:
+	mov S, LoopC
+	lsl S
+	lsl S
+	andi S, 0x3C /* S is a bytepointer so *4 */
+	/* load w[s] */
+	movw r26, W1
+	add r26, S /* X points at w[s] */
+	adc r27, xNULL
+	ld T1, X+
+	ld T2, X+
+	ld T3, X+
+	ld T4, X+
+
+	/**/
+	push r26
+	push r27
+	push T4
+	push T3
+	push T2
+	push T1
+	in r26, SPL
+	in r27, SPH
+	adiw r26, 1
+	dbg_hexdump 4
+	pop T1
+	pop T2
+	pop T3
+	pop T4
+	pop r27
+	pop r26
+	/**/
+
+	cpi LoopC, 16
+	brlt sha1_nextBlock_mainloop_core
+	/* update w[s] */
+	ldi tmp1, 2*4
+	rcall 1f
+	ldi tmp1, 8*4
+	rcall 1f
+	ldi tmp1, 13*4
+	rcall 1f
+	rjmp 2f
+1:		/* this might be "outsourced" to save the jump above */
+	add tmp1, S
+	andi tmp1, 0x3f
+	movw r26, W1
+	add r26, tmp1
+	adc r27, xNULL
+	ld tmp2, X+
+	eor T1, tmp2
+	ld tmp2, X+
+	eor T2, tmp2
+	ld tmp2, X+
+	eor T3, tmp2
+	ld tmp2, X+
+	eor T4, tmp2
+	ret
+2:	/* now we just hav to do a ROTL(T) and save T back */
+	mov tmp2, T4
+	rol tmp2
+	rol T1
+	rol T2
+	rol T3
+	rol T4
+	movw r26, W1
+	add r26, S
+	adc r27, xNULL
+	st X+, T1
+	st X+, T2
+	st X+, T3
+	st X+, T4
+	
+sha1_nextBlock_mainloop_core:	/* ther core function; T=ROTL5(a) ....*/	
+								/* T already contains w[s] */
+	movw r26, W1
+	sbiw r26, 4*1		/* X points at a[4] aka e */
+	ld tmp1, X+ 
+	add T1, tmp1
+	ld tmp1, X+ 
+	adc T2, tmp1
+	ld tmp1, X+ 
+	adc T3, tmp1
+	ld tmp1, X+ 
+	adc T4, tmp1		/* T = w[s]+e */
+	sbiw r26, 4*5		/* X points at a[0] aka a */
+	ld F1, X+ 
+	ld F2, X+ 
+	ld F3, X+ 
+	ld F4, X+ 
+	mov tmp1, F4		/* X points at a[1] aka b */
+	ldi tmp2, 5
+1:
+	rol tmp1
+	rol F1
+	rol F2
+	rol F3
+	rol F4
+	dec tmp2
+	brne 1b
+	
+	add T1, F1
+	adc T2, F2
+	adc T3, F3
+	adc T4, F4 /* T = ROTL(a,5) + e + w[s] */
+	
+	/* now we have to do this fucking conditional stuff */
+	ldi r30, lo8(sha1_nextBlock_xTable)
+	ldi r31, hi8(sha1_nextBlock_xTable)
+	add r30, xtmp
+	adc r31, xNULL
+	lpm tmp1, Z
+	cp tmp1, LoopC
+	brne 1f
+	inc xtmp
+1:	ldi r30, lo8(sha1_nextBlock_KTable)
+	ldi r31, hi8(sha1_nextBlock_KTable)
+	lsl xtmp
+	lsl xtmp
+	add r30, xtmp
+	adc r31, xNULL
+	lsr xtmp
+	lsr xtmp
+	 
+	lpm tmp1, Z+
+	add T1, tmp1
+	lpm tmp1, Z+
+	adc T2, tmp1
+	lpm tmp1, Z+
+	adc T3, tmp1
+	lpm tmp1, Z+
+	adc T4, tmp1
+			/* T = ROTL(a,5) + e + kt + w[s] */
+	
+	/* Z-4 is just pointing to kt ... */
+	movw r28, r26 /* copy X in Y */
+	adiw r30, 3*4 /* now Z points to the rigth locatin in our jump-vector-table */
+	lsr r31
+	ror r30
+		
+	icall
+	mov F1, tmp1
+	icall
+	mov F2, tmp1
+	icall
+	mov F3, tmp1
+	icall
+	
+	add T1, F1
+	adc T2, F2
+	adc T3, F3
+	adc T4, tmp1 /* T = ROTL5(a) + f_t(b,c,d) + e + k_t + w[s] */
+				 /* X points still at a[1] aka b, Y points at a[2] aka c */	
+	/* update a[] */
+sha1_nextBlock_update_a:
+	/*first we move all vars in a[] "one up" e=d, d=c, c=b, b=a*/
+	//adiw r28, 3*4  /* Y should point at a[4] aka e */
+	movw r28, W1
+	sbiw r28, 4
+	
+	ldi tmp2, 4*4 
+1:	
+	ld tmp1, -Y
+	std Y+4, tmp1
+	dec tmp2
+	brne 1b
+	/* Y points at a[0] aka a*/
+	
+	movw r28, W1
+	sbiw r28, 5*4
+	/* store T in a[0] aka a */
+	st Y+, T1
+	st Y+, T2
+	st Y+, T3
+	st Y+, T4
+	/* Y points at a[1] aka b*/
+	
+	/* rotate c */
+	ldd T1, Y+1*4
+	ldd T2, Y+1*4+1
+	ldd T3, Y+1*4+2
+	ldd T4, Y+1*4+3
+	mov tmp1, T1
+	ldi tmp2, 2
+1:	ror tmp1
+	ror T4
+	ror T3
+	ror T2
+	ror T1
+	dec tmp2
+	brne 1b
+	std Y+1*4+0, T1
+	std Y+1*4+1, T2
+	std Y+1*4+2, T3
+	std Y+1*4+3, T4
+	
+	push r27
+	push r26
+	movw r26, W1
+	sbiw r26, 4*5
+	dbg_hexdump 4*5
+	pop r26
+	pop r27
+	
+	inc LoopC
+	cpi LoopC, 80
+	brge 1f
+	rjmp sha1_nextBlock_mainloop
+/**************************************/
+1:	
+   /* littel patch */
+	sbiw r28, 4
+
+/* add a[] to state and inc length */	
+	pop r27
+	pop r26		/* now X points to state (and Y still at a[0]) */
+	ldi tmp4, 5
+1:	clc
+	ldi tmp3, 4
+2:	ld tmp1, X
+	ld tmp2, Y+
+	adc tmp1, tmp2
+	st X+, tmp1
+	dec tmp3
+	brne 2b
+	dec tmp4
+	brne 1b
+	
+	/* now length += 512 */
+	adiw r26, 1 /* we skip the least significant byte */
+	ld tmp1, X
+	ldi tmp2, hi8(512) /* 2 */
+	add tmp1, tmp2
+	st X+, tmp1
+	ldi tmp2, 6
+1:
+	ld tmp1, X
+	adc tmp1, xNULL
+	st X+, tmp1
+	dec tmp2
+	brne 1b
+	
+; EPILOG
+sha1_nextBlock_epilog:
+/* now we should clean up the stack */
+	pop r21
+	pop r20
+	in r0, SREG
+	cli ; we want to be uninterrupted while updating SP
+	out SPL, r20
+	out SPH, r21
+	out SREG, r0
+	
+	clr r1
+	pop r29
+	pop r28
+	pop r17
+	pop r16
+	pop r15
+	pop r14
+	pop r13
+	pop r12
+	pop r11
+	pop r10
+	ret
+
+sha1_nextBlock_xTable:
+.byte 20,40,60,0
+sha1_nextBlock_KTable:
+.int	0x5a827999 
+.int	0x6ed9eba1 
+.int	0x8f1bbcdc 
+.int	0xca62c1d6
+sha1_nextBlock_JumpTable:
+rjmp sha1_nextBlock_Ch
+	nop	
+rjmp sha1_nextBlock_Parity
+	nop
+rjmp sha1_nextBlock_Maj
+	nop
+rjmp sha1_nextBlock_Parity
+
+	 /* X and Y still point at a[1] aka b ; return value in tmp1 */
+sha1_nextBlock_Ch:
+	ld tmp1, Y+
+	mov tmp2, tmp1
+	com tmp2
+	ldd tmp3, Y+3	/* load from c */
+	and tmp1, tmp3
+	ldd tmp3, Y+7	/* load from d */
+	and tmp2, tmp3
+	eor tmp1, tmp2
+	ret
+	
+sha1_nextBlock_Maj:
+	ld tmp1, Y+
+	mov tmp2, tmp1
+	ldd tmp3, Y+3	/* load from c */
+	and tmp1, tmp3
+	ldd tmp4, Y+7	/* load from d */
+	and tmp2, tmp4
+	eor tmp1, tmp2
+	and tmp3, tmp4
+	eor tmp1, tmp3
+	ret
+
+sha1_nextBlock_Parity:
+	ld tmp1, Y+
+	ldd tmp2, Y+3	/* load from c */
+	eor tmp1, tmp2
+	ldd tmp2, Y+7	/* load from d */
+	eor tmp1, tmp2
+	ret
+/*	
+ch_str:			.asciz "\r\nCh"
+maj_str:		.asciz "\r\nMaj"
+parity_str:	.asciz "\r\nParity"
+*/
+;###########################################################	
+
+.global sha1_init 
+;void sha1_init(sha1_ctx_t *state){
+;	DEBUG_S("\r\nSHA1_INIT");
+;	state->h[0] = 0x67452301;
+;	state->h[1] = 0xefcdab89;
+;	state->h[2] = 0x98badcfe;
+;	state->h[3] = 0x10325476;
+;	state->h[4] = 0xc3d2e1f0;
+;	state->length = 0;
+;}
+; param1: (Func3,r24) 16-bit pointer to sha1_ctx_t struct in ram
+; modifys: Z(r30,r31), Func1, r22
+sha1_init:
+	movw r26, r24 ; (24,25) --> (26,27) load X with param1
+	ldi r30, lo8((sha1_init_vector))
+	ldi r31, hi8((sha1_init_vector))
+	ldi r22, 5*4 /* bytes to copy */
+sha1_init_vloop:	
+	lpm r23, Z+ 
+	st X+, r23
+	dec r22
+	brne sha1_init_vloop
+	ldi r22, 8
+sha1_init_lloop:
+	st X+, r1
+	dec r22
+	brne sha1_init_lloop
+	ret
+	
+sha1_init_vector:
+.int 0x67452301;
+.int 0xefcdab89;
+.int 0x98badcfe;
+.int 0x10325476;
+.int 0xc3d2e1f0;
+
diff --git a/shacal1/sha1.h b/shacal1/sha1.h
new file mode 100644
index 0000000..6675d20
--- /dev/null
+++ b/shacal1/sha1.h
@@ -0,0 +1,117 @@
+/* sha1.h */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/**
+ * \file	sha1.h
+ * \author	Daniel Otte
+ * \email   daniel.otte@rub.de
+ * \date	2006-10-08
+ * \license GPLv3 or later
+ * \brief   SHA-1 declaration.
+ * \ingroup SHA-1
+ * 
+ */
+ 
+#ifndef SHA1_H_
+#define SHA1_H_
+
+#include <stdint.h>
+/** \def SHA1_HASH_BITS
+ * definees the size of a SHA-1 hash in bits 
+ */
+
+/** \def SHA1_HASH_BYTES
+ * definees the size of a SHA-1 hash in bytes 
+ */
+
+/** \def SHA1_BLOCK_BITS
+ * definees the size of a SHA-1 input block in bits 
+ */
+
+/** \def SHA1_BLOCK_BYTES
+ * definees the size of a SHA-1 input block in bytes 
+ */
+#define SHA1_HASH_BITS  160
+#define SHA1_HASH_BYTES (SHA1_HASH_BITS/8)
+#define SHA1_BLOCK_BITS 512
+#define SHA1_BLOCK_BYTES (SHA1_BLOCK_BITS/8)
+
+/** \typedef sha1_ctx_t
+ * \brief SHA-1 context type
+ * 
+ * A vatiable of this type may hold the state of a SHA-1 hashing process
+ */
+typedef struct {
+	uint32_t h[5];
+	uint64_t length;
+} sha1_ctx_t;
+
+/** \typedef sha1_hash_t
+ * \brief hash value type
+ * A variable of this type may hold a SHA-1 hash value 
+ */
+typedef uint8_t sha1_hash_t[SHA1_HASH_BITS/8];
+
+/** \fn sha1_init(sha1_ctx_t *state)
+ * \brief initializes a SHA-1 context
+ * This function sets a ::sha1_ctx_t variable to the initialization vector
+ * for SHA-1 hashing.
+ * \param state pointer to the SHA-1 context variable
+ */
+void sha1_init(sha1_ctx_t *state);
+
+/** \fn sha1_nextBlock(sha1_ctx_t *state, const void* block)
+ *  \brief process one input block
+ * This function processes one input block and updates the hash context 
+ * accordingly
+ * \param state pointer to the state variable to update
+ * \param block pointer to the message block to process
+ */
+void sha1_nextBlock (sha1_ctx_t *state, const void* block);
+
+/** \fn sha1_lastBlock(sha1_ctx_t *state, const void* block, uint16_t length_b)
+ * \brief processes the given block and finalizes the context
+ * This function processes the last block in a SHA-1 hashing process.
+ * The block should have a maximum length of a single input block.
+ * \param state pointer to the state variable to update and finalize
+ * \param block pointer to themessage block to process
+ * \param length_b length of the message block in bits  
+ */
+void sha1_lastBlock (sha1_ctx_t *state, const void* block, uint16_t length_b);
+
+/** \fn sha1_ctx2hash(sha1_hash_t *dest, sha1_ctx_t *state)
+ * \brief convert a state variable into an actual hash value
+ * Writes the hash value corresponding to the state to the memory pointed by dest.
+ * \param dest pointer to the hash value destination
+ * \param state pointer to the hash context
+ */ 
+void sha1_ctx2hash (sha1_hash_t *dest, sha1_ctx_t *state);
+
+/** \fn sha1(sha1_hash_t *dest, const void* msg, uint32_t length_b)
+ * \brief hashing a message which in located entirely in RAM
+ * This function automatically hashes a message which is entirely in RAM with
+ * the SHA-1 hashing algorithm.
+ * \param dest pointer to the hash value destination
+ * \param msg  pointer to the message which should be hashed
+ * \param length_b length of the message in bits
+ */ 
+void sha1(sha1_hash_t *dest, const void* msg, uint32_t length_b);
+
+
+
+#endif /*SHA1_H_*/
diff --git a/shacal1/shacal1_enc.c b/shacal1/shacal1_enc.c
new file mode 100644
index 0000000..634f18d
--- /dev/null
+++ b/shacal1/shacal1_enc.c
@@ -0,0 +1,54 @@
+/* shacal1_enc.c */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/**
+ * \file	shacal1_enc.c
+ * \author	Daniel Otte
+ * \date	2008-05-06
+ * \par License:
+ * GPL
+ * \brief SHACAL1 encryption only implementation.
+ * 
+ */
+
+#include <stdint.h>
+#include <string.h>
+#include "sha1.h"
+#include "shacal1_enc.h"
+
+void shacal1_enc(void* buffer, void* key, uint16_t keysize_b){
+	sha1_ctx_t ctx, t_ctx;
+	uint8_t i;
+	memcpy(t_ctx.h, buffer, SHA1_HASH_BITS/8);
+	
+	uint8_t keybuffer[SHA1_BLOCK_BITS/8];
+	memset(keybuffer, 0, SHA1_BLOCK_BITS/8);
+	if(keysize_b>SHA1_BLOCK_BITS)
+		keysize_b=SHA1_BLOCK_BITS;
+	memcpy(keybuffer, key, (keysize_b+7)/8);
+	
+	memcpy(t_ctx.h, buffer, SHA1_HASH_BITS/8);
+	sha1_ctx2hash((sha1_hash_t*)(&(ctx.h[0])), &t_ctx);
+	memcpy(t_ctx.h, ctx.h, SHA1_HASH_BITS/8);
+	sha1_nextBlock(&ctx, keybuffer);
+	for(i=0; i<5; ++i)
+		ctx.h[i] -= t_ctx.h[i];
+	sha1_ctx2hash(buffer, &ctx);
+}
+
+
diff --git a/shacal1/shacal1_enc.h b/shacal1/shacal1_enc.h
new file mode 100644
index 0000000..e1bdd98
--- /dev/null
+++ b/shacal1/shacal1_enc.h
@@ -0,0 +1,31 @@
+/* shacal1_enc.h */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+#ifndef SHACAL1_ENC_H_
+#define SHACAL1_ENC_H_
+
+#include <stdint.h>
+
+#define SHACAL1_BLOCKSIZE 160
+#define SHACAL1_BLOCKSIZE_B ((SHACAL1_BLOCKSIZE+7)/8)
+#define SHACAL1_KEYSIZE 512
+#define SHACAL1_KEYSIZE_B ((SHACAL1_KEYSIZE+7)/8)
+
+void shacal1_enc(void* buffer, void* key, uint16_t keysize_b);
+
+#endif /*SHACAL1_ENC_H_*/
diff --git a/shacal1_enc.c b/shacal1_enc.c
deleted file mode 100644
index 634f18d..0000000
--- a/shacal1_enc.c
+++ /dev/null
@@ -1,54 +0,0 @@
-/* shacal1_enc.c */
-/*
-    This file is part of the AVR-Crypto-Lib.
-    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-/**
- * \file	shacal1_enc.c
- * \author	Daniel Otte
- * \date	2008-05-06
- * \par License:
- * GPL
- * \brief SHACAL1 encryption only implementation.
- * 
- */
-
-#include <stdint.h>
-#include <string.h>
-#include "sha1.h"
-#include "shacal1_enc.h"
-
-void shacal1_enc(void* buffer, void* key, uint16_t keysize_b){
-	sha1_ctx_t ctx, t_ctx;
-	uint8_t i;
-	memcpy(t_ctx.h, buffer, SHA1_HASH_BITS/8);
-	
-	uint8_t keybuffer[SHA1_BLOCK_BITS/8];
-	memset(keybuffer, 0, SHA1_BLOCK_BITS/8);
-	if(keysize_b>SHA1_BLOCK_BITS)
-		keysize_b=SHA1_BLOCK_BITS;
-	memcpy(keybuffer, key, (keysize_b+7)/8);
-	
-	memcpy(t_ctx.h, buffer, SHA1_HASH_BITS/8);
-	sha1_ctx2hash((sha1_hash_t*)(&(ctx.h[0])), &t_ctx);
-	memcpy(t_ctx.h, ctx.h, SHA1_HASH_BITS/8);
-	sha1_nextBlock(&ctx, keybuffer);
-	for(i=0; i<5; ++i)
-		ctx.h[i] -= t_ctx.h[i];
-	sha1_ctx2hash(buffer, &ctx);
-}
-
-
diff --git a/shacal1_enc.h b/shacal1_enc.h
deleted file mode 100644
index e1bdd98..0000000
--- a/shacal1_enc.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/* shacal1_enc.h */
-/*
-    This file is part of the AVR-Crypto-Lib.
-    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-#ifndef SHACAL1_ENC_H_
-#define SHACAL1_ENC_H_
-
-#include <stdint.h>
-
-#define SHACAL1_BLOCKSIZE 160
-#define SHACAL1_BLOCKSIZE_B ((SHACAL1_BLOCKSIZE+7)/8)
-#define SHACAL1_KEYSIZE 512
-#define SHACAL1_KEYSIZE_B ((SHACAL1_KEYSIZE+7)/8)
-
-void shacal1_enc(void* buffer, void* key, uint16_t keysize_b);
-
-#endif /*SHACAL1_ENC_H_*/
diff --git a/shacal2/sha256-asm.S b/shacal2/sha256-asm.S
new file mode 100644
index 0000000..d9eb6b6
--- /dev/null
+++ b/shacal2/sha256-asm.S
@@ -0,0 +1,1042 @@
+/* sha256-asm.S */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/*
+ * Author:	Daniel Otte
+ *
+ * License: GPLv3 or later
+*/
+; sha-256 implementation in assembler	
+SHA256_BLOCK_BITS = 512
+SHA256_HASH_BITS = 256
+
+.macro precall
+	/* push r18 - r27, r30 - r31*/
+	push r0
+	push r1
+	push r18
+	push r19
+	push r20
+	push r21
+	push r22
+	push r23
+	push r24
+	push r25
+	push r26
+	push r27
+	push r30
+	push r31
+	clr r1
+.endm
+
+.macro postcall
+	pop r31
+	pop r30
+	pop r27
+	pop r26
+	pop r25
+	pop r24
+	pop r23
+	pop r22
+	pop r21
+	pop r20
+	pop r19
+	pop r18
+	pop r1
+	pop r0
+.endm
+
+
+.macro hexdump length
+	push r27
+	push r26
+	ldi r25, '\r'
+	mov r24, r25
+	call uart_putc
+	ldi r25, '\n'
+	mov r24, r25
+	call uart_putc
+	pop r26
+	pop r27
+	movw r24, r26
+.if \length > 16
+	ldi r22, lo8(16)
+	ldi r23, hi8(16)
+	push r27
+	push r26
+	call uart_hexdump
+	pop r26
+	pop r27
+	adiw r26, 16
+	hexdump \length-16
+.else
+	ldi r22, lo8(\length)
+	ldi r23, hi8(\length)
+	call uart_hexdump
+.endif
+.endm
+
+/* X points to Block */
+.macro dbg_hexdump length
+	precall
+	hexdump \length
+	postcall
+.endm
+
+.section .text
+
+SPL = 0x3D
+SPH = 0x3E
+SREG = 0x3F
+
+
+;
+;sha256_ctx_t is:
+;
+; [h0][h1][h2][h3][h4][h5][h6][h7][length]
+; hn is 32 bit large, length is 64 bit large
+
+;###########################################################	
+
+.global sha256_ctx2hash
+; === sha256_ctx2hash ===
+; this function converts a state into a normal hash (bytestring)
+;  param1: the 16-bit destination pointer
+;	given in r25,r24 (r25 is most significant)
+;  param2: the 16-bit pointer to sha256_ctx structure
+;	given in r23,r22
+sha256_ctx2hash:
+	movw r26, r22
+	movw r30, r24
+	ldi r21, 8
+	sbiw r26, 4
+1:	
+	ldi r20, 4
+	adiw r26, 8
+2:	
+		ld r0, -X
+		st Z+, r0	
+	dec r20
+	brne 2b
+	
+	dec r21
+	brne 1b
+	
+	ret
+
+;###########################################################	
+
+.global sha256
+; === sha256 ===
+; this function calculates SHA-256 hashes from messages in RAM
+;  param1: the 16-bit hash destination pointer
+;	given in r25,r24 (r25 is most significant)
+;  param2: the 16-bit pointer to message
+;	given in r23,r22
+;  param3: 32-bit length value (length of message in bits)
+;   given in r21,r20,r19,r18
+sha256:
+sha256_prolog:
+	push r8
+	push r9
+	push r10
+	push r11
+	push r12
+	push r13
+	push r16
+	push r17
+	in r16, SPL
+	in r17, SPH
+	subi r16, 8*4+8 
+	sbci r17, 0	
+	in r0, SREG
+	cli
+	out SPL, r16
+	out SPH, r17
+	out SREG, r0
+	
+	push r25
+	push r24
+	inc r16
+	adc r17, r1
+	
+	movw r8, r18		/* backup of length*/
+	movw r10, r20
+	
+	movw r12, r22	/* backup pf msg-ptr */
+	
+	movw r24, r16
+	rcall sha256_init
+	/* if length >= 512 */
+1:
+	tst r11
+	brne 4f
+	tst r10
+	brne 4f
+	mov r19, r9
+	cpi r19, 0x02
+	brlo 4f
+	
+	movw r24, r16
+	movw r22, r12
+	rcall sha256_nextBlock
+	ldi r19, 0x64
+	add r22, r19
+	adc r23, r1
+	/* length -= 512 */
+	ldi r19, 0x02
+	sub r9, r19
+	sbc r10, r1
+	sbc r11, r1
+	rjmp 1b
+	
+4:
+	movw r24, r16
+	movw r22, r12
+	movw r20, r8
+	rcall sha256_lastBlock
+	
+	pop r24
+	pop r25
+	movw r22, r16
+	rcall sha256_ctx2hash	
+	
+sha256_epilog:
+	in r30, SPL
+	in r31, SPH
+	adiw r30, 8*4+8 	
+	in r0, SREG
+	cli
+	out SPL, r30
+	out SPH, r31
+	out SREG, r0
+	pop r17
+	pop r16
+	pop r13
+	pop r12
+	pop r11
+	pop r10
+	pop r9
+	pop r8
+	ret
+
+;###########################################################	
+
+
+; block MUST NOT be larger than 64 bytes
+
+.global sha256_lastBlock
+; === sha256_lastBlock ===
+; this function does padding & Co. for calculating SHA-256 hashes
+;  param1: the 16-bit pointer to sha256_ctx structure
+;	given in r25,r24 (r25 is most significant)
+;  param2: an 16-bit pointer to 64 byte block to hash
+;	given in r23,r22
+;  param3: an 16-bit integer specifing length of block in bits
+;	given in r21,r20
+sha256_lastBlock_localSpace = (SHA256_BLOCK_BITS/8+1)
+
+
+sha256_lastBlock:
+	cpi r21, 0x02
+	brlo sha256_lastBlock_prolog
+	push r25
+	push r24
+	push r23
+	push r22
+	push r21
+	push r20
+	rcall sha256_nextBlock
+	pop r20
+	pop r21
+	pop r22
+	pop r23
+	pop r24
+	pop r25
+	subi r21, 0x02
+	subi r23, -2
+	rjmp sha256_lastBlock	
+sha256_lastBlock_prolog:
+	/* allocate space on stack */
+	in r30, SPL
+	in r31, SPH
+	in r1, SREG
+	subi r30, lo8(64)
+	sbci r31, hi8(64)
+	cli
+	out SPL, r30
+	out SPH, r31
+	out SREG,r1
+
+	adiw r30, 1 /* SP points to next free byte on stack */
+	mov r18, r20 /* r20 = LSB(length) */
+	lsr r18
+	lsr r18
+	lsr r18
+	bst r21, 0	/* may be we should explain this ... */
+	bld r18, 5  /* now: r18 == length/8 (aka. length in bytes) */
+	
+	
+	movw r26, r22 /* X points to begin of msg */
+	tst r18
+	breq sha256_lastBlock_post_copy
+	mov r1, r18
+sha256_lastBlock_copy_loop:
+	ld r0, X+
+	st Z+, r0
+	dec r1
+	brne sha256_lastBlock_copy_loop
+sha256_lastBlock_post_copy:	
+sha256_lastBlock_insert_stuffing_bit:	
+	ldi r19, 0x80
+	mov r0,r19 	
+	ldi r19, 0x07
+	and r19, r20 /* if we are in bitmode */
+	breq 2f	/* no bitmode */
+1:	
+	lsr r0
+	dec r19
+	brne 1b
+	ld r19, X
+/* maybe we should do some ANDing here, just for safety */
+	or r0, r19
+2:	
+	st Z+, r0
+	inc r18
+
+/* checking stuff here */
+	cpi r18, 64-8+1
+	brsh 0f 
+	rjmp sha256_lastBlock_insert_zeros
+0:
+	/* oh shit, we landed here */
+	/* first we have to fill it up with zeros */
+	ldi r19, 64
+	sub r19, r18
+	breq 2f
+1:	
+	st Z+, r1
+	dec r19
+	brne 1b	
+2:	
+	sbiw r30, 63
+	sbiw r30,  1
+	movw r22, r30
+	
+	push r31
+	push r30
+	push r25
+	push r24
+	push r21
+	push r20
+	rcall sha256_nextBlock
+	pop r20
+	pop r21
+	pop r24
+	pop r25
+	pop r30
+	pop r31
+	
+	/* now we should subtract 512 from length */
+	movw r26, r24
+	adiw r26, 4*8+1 /* we can skip the lowest byte */
+	ld r19, X
+	subi r19, hi8(512)
+	st X+, r19
+	ldi r18, 6
+1:
+	ld r19, X
+	sbci r19, 0
+	st X+, r19
+	dec r18
+	brne 1b
+	
+;	clr r18 /* not neccessary ;-) */
+	/* reset Z pointer to begin of block */
+
+sha256_lastBlock_insert_zeros:	
+	ldi r19, 64-8
+	sub r19, r18
+	breq sha256_lastBlock_insert_length
+	clr r1
+1:
+	st Z+, r1	/* r1 is still zero */
+	dec r19
+	brne 1b
+
+;	rjmp sha256_lastBlock_epilog
+sha256_lastBlock_insert_length:
+	movw r26, r24	/* X points to state */
+	adiw r26, 8*4	/* X points to (state.length) */
+	adiw r30, 8		/* Z points one after the last byte of block */
+	ld r0, X+
+	add r0, r20
+	st -Z, r0
+	ld r0, X+
+	adc r0, r21
+	st -Z, r0
+	ldi r19, 6
+1:
+	ld r0, X+
+	adc r0, r1
+	st -Z, r0
+	dec r19
+	brne 1b
+
+	sbiw r30, 64-8
+	movw r22, r30
+	rcall sha256_nextBlock
+
+sha256_lastBlock_epilog:
+	in r30, SPL
+	in r31, SPH
+	in r1, SREG
+	adiw r30, 63 ; lo8(64)
+	adiw r30,  1  ; hi8(64)
+	cli
+	out SPL, r30
+	out SPH, r31
+	out SREG,r1
+	clr r1
+	clr r0
+	ret
+
+/**/
+;###########################################################	
+
+.global sha256_nextBlock
+; === sha256_nextBlock ===
+; this is the core function for calculating SHA-256 hashes
+;  param1: the 16-bit pointer to sha256_ctx structure
+;	given in r25,r24 (r25 is most significant)
+;  param2: an 16-bit pointer to 64 byte block to hash
+;	given in r23,r22
+sha256_nextBlock_localSpace = (64+8)*4 ; 64 32-bit values for w array and 8 32-bit values for a array (total 288 byte)
+
+Bck1 = 12
+Bck2 = 13
+Bck3 = 14
+Bck4 = 15
+Func1 = 22
+Func2 = 23
+Func3 = 24
+Func4 = 25
+Accu1 = 16
+Accu2 = 17
+Accu3 = 18
+Accu4 = 19
+XAccu1 = 8
+XAccu2 = 9
+XAccu3 = 10
+XAccu4 = 11
+T1	= 4
+T2	= 5
+T3	= 6
+T4	= 7
+LoopC = 1
+/* byteorder: high number <--> high significance */
+sha256_nextBlock:
+ ; initial, let's make some space ready for local vars
+	push r4 /* replace push & pop by mem ops? */
+	push r5
+	push r6
+	push r7
+	push r8
+	push r9
+	push r10
+	push r11
+	push r12
+	push r13
+	push r14
+	push r15
+	push r16
+	push r17
+	push r28
+	push r29
+	in r20, SPL
+	in r21, SPH
+	movw r18, r20			;backup SP
+;	movw r26, r20			; X points to free space on stack 
+	movw r30, r22			; Z points to message
+	subi r20, lo8(sha256_nextBlock_localSpace) ;sbiw can do only up to 63
+	sbci r21, hi8(sha256_nextBlock_localSpace)
+	movw r26, r20			; X points to free space on stack 
+	in r0, SREG
+	cli ; we want to be uninterrupted while updating SP
+	out SPL, r20
+	out SPH, r21
+	out SREG, r0
+	push r18
+	push r19
+	push r24
+	push r25 /* param1 will be needed later */
+ ; now we fill the w array with message (think about endianess)
+ 	adiw r26, 1 ; X++
+ 	ldi r20, 16
+sha256_nextBlock_wcpyloop: 	
+ 	ld r23, Z+
+ 	ld r22, Z+
+ 	ld r19, Z+
+ 	ld r18, Z+
+ 	st X+, r18
+ 	st X+, r19
+ 	st X+, r22	
+	st X+, r23
+	dec r20
+	brne sha256_nextBlock_wcpyloop
+/*	for (i=16; i<64; ++i){
+		w[i] = SIGMA_b(w[i-2]) + w[i-7] + SIGMA_a(w[i-15]) + w[i-16];	
+	} */
+	/* r25,r24,r23,r24 (r21,r20) are function values
+	   r19,r18,r17,r16 are the accumulator
+	   r15,r14,r13,rBck1 are backup1
+	   r11,r10,r9 ,r8  are xor accu   
+	   r1 is round counter 								*/
+
+	ldi r20, 64-16
+	mov LoopC, r20
+sha256_nextBlock_wcalcloop:		 
+	movw r30, r26 ; cp X to Z
+	sbiw r30, 63
+	sbiw r30, 1 		; substract 64 = 16*4
+	ld Accu1, Z+
+	ld Accu2, Z+
+	ld Accu3, Z+
+	ld Accu4, Z+ /* w[i] = w[i-16] */
+	ld Bck1, Z+
+	ld Bck2, Z+
+	ld Bck3, Z+
+	ld Bck4, Z+ /* backup = w[i-15] */
+	/* now sigma 0 */
+	mov Func1, Bck2
+	mov Func2, Bck3
+	mov Func3, Bck4
+	mov Func4, Bck1  /* prerotated by 8 */
+	ldi r20, 1
+	rcall bitrotl
+	movw XAccu1, Func1
+	movw XAccu3, Func3	 /* store ROTR(w[i-15],7) in xor accu */
+	movw Func1, Bck3
+	movw Func3, Bck1 /* prerotated by 16 */
+	ldi r20, 2
+	rcall bitrotr
+	eor XAccu1, Func1  /* xor ROTR(w[i-15], 18)*/
+	eor XAccu2, Func2
+	eor XAccu3, Func3
+	eor XAccu4, Func4
+	ldi Func2, 3		 /* now shr3 */ /*we can destroy backup now*/
+sigma0_shr:
+	lsr Bck4
+	ror Bck3
+	ror Bck2
+	ror Bck1	
+	dec Func2
+	brne sigma0_shr
+	eor XAccu1, Bck1
+	eor XAccu2, Bck2
+	eor XAccu3, Bck3
+	eor XAccu4, Bck4	/* xor SHR(w[i-15], 3)*/ /* xor accu == sigma1(w[i-15]) */
+	add Accu1, XAccu1
+	adc Accu2, XAccu2
+	adc Accu3, XAccu3
+	adc Accu4, XAccu4 /* finished with sigma0 */
+	ldd Func1, Z+7*4  /* now accu += w[i-7] */
+	ldd Func2, Z+7*4+1
+	ldd Func3, Z+7*4+2
+	ldd Func4, Z+7*4+3
+	add Accu1, Func1
+	adc Accu2, Func2
+	adc Accu3, Func3
+	adc Accu4, Func4
+	ldd Bck1, Z+12*4 /* now backup = w[i-2]*/
+	ldd Bck2, Z+12*4+1
+	ldd Bck3, Z+12*4+2
+	ldd Bck4, Z+12*4+3
+	/* now sigma 1 */
+	movw Func1, Bck3
+	movw Func3, Bck1 /* prerotated by 16 */
+	ldi r20, 1
+	rcall bitrotr
+	movw XAccu3, Func3
+	movw XAccu1, Func1	 /* store in ROTR(w[i-2], 17) xor accu */
+;	movw Func1, Bck3
+;	movw Func3, Bck1 /* prerotated by 16 */
+	ldi r20, 2
+	rcall bitrotr
+	eor XAccu1, Func1  /* xor ROTR(w[i-2], 19)*/
+	eor XAccu2, Func2
+	eor XAccu3, Func3
+	eor XAccu4, Func4
+	ldi Func2, 2	 /* now shr10 (dirty trick, skipping a byte) */ /*we can destroy backup now*/
+sigma1_shr:
+	lsr Bck4
+	ror Bck3
+	ror Bck2	
+	dec Func2
+	brne sigma1_shr
+	eor XAccu1, Bck2
+	eor XAccu2, Bck3
+	eor XAccu3, Bck4  /* xor SHR(w[i-2], 10)*/ /* xor accu == sigma1(w[i-15]) */
+	add Accu1, XAccu1
+	adc Accu2, XAccu2
+	adc Accu3, XAccu3
+	adc Accu4, XAccu4 /* finished with sigma0 */
+	/* now let's store the shit */
+	st X+, Accu1
+	st X+, Accu2
+	st X+, Accu3
+	st X+, Accu4
+	dec LoopC
+	breq 3f  ; skip if zero
+	rjmp sha256_nextBlock_wcalcloop
+3:
+	/* we are finished with w array X points one byte post w */
+/* init a array */
+	pop r31
+	pop r30
+	push r30
+	push r31
+	ldi r25, 8*4 /* 8 32-bit values to copy from ctx to a array */
+init_a_array:	
+	ld r1, Z+
+	st X+, r1
+	dec r25
+	brne init_a_array
+	
+/* now the real fun begins */
+/* for (i=0; i<64; ++i){
+			t1 = a[7] + SIGMA1(a[4]) + CH(a[4],a[5],a[6]) + k[i] + w[i];
+			t2 = SIGMA0(a[0]) + MAJ(a[0],a[1],a[2]);
+			memmove(&(a[1]), &(a[0]), 7*4); 	// a[7]=a[6]; a[6]=a[5]; a[5]=a[4]; a[4]=a[3]; a[3]=a[2]; a[2]=a[1]; a[1]=a[0]; 
+			a[4] += t1;
+			a[0] = t1 + t2;
+		} */
+	/* Y points to a[0], Z ('cause lpm wants it) points to k[i], X points to w[i] */
+	sbiw r26, 8*4  /* X still points at a[7]+1*/
+	movw r28, r26
+	ldi r30, lo8(sha256_kv)
+	ldi r31, hi8(sha256_kv)		
+	dec r27  /* X - (64*4 == 256) */
+	ldi r25, 64
+	mov LoopC, r25
+sha256_main_loop:
+	/* now calculate t1 */
+	 /*CH(x,y,z) = (x&y)^((~x)&z)*/
+	ldd T1, Y+5*4
+	ldd T2, Y+5*4+1
+	ldd T3, Y+5*4+2
+	ldd T4, Y+5*4+3 /* y in T */
+	ldd Func1, Y+4*4
+	ldd Func2, Y+4*4+1
+	ldd Func3, Y+4*4+2
+	ldd Func4, Y+4*4+3  /* x in Func */
+	ldd Bck1, Y+6*4
+	ldd Bck2, Y+6*4+1
+	ldd Bck3, Y+6*4+2
+	ldd Bck4, Y+6*4+3 /* z in Bck */
+	and T1, Func1
+	and T2, Func2
+	and T3, Func3
+	and T4, Func4
+	com Func1
+	com Func2
+	com Func3
+	com Func4
+	and Bck1, Func1
+	and Bck2, Func2
+	and Bck3, Func3
+	and Bck4, Func4
+	eor T1, Bck1
+	eor T2, Bck2
+	eor T3, Bck3
+	eor T4, Bck4 /* done, CH(x,y,z) is in T */
+	/* now SIGMA1(a[4]) */
+	ldd Bck4, Y+4*4		/* think about using it from Func reg above*/
+	ldd Bck1, Y+4*4+1	
+	ldd Bck2, Y+4*4+2
+	ldd Bck3, Y+4*4+3 /* load prerotate by 8-bit */	
+	movw Func1, Bck1
+	movw Func3, Bck3
+	ldi r20, 2 
+	rcall bitrotl		/* rotr(x,6) */ 
+	movw XAccu1, Func1
+	movw XAccu3, Func3
+	movw Func1, Bck1
+	movw Func3, Bck3
+	ldi r20, 3 
+	rcall bitrotr 	/* rotr(x,11) */
+	eor XAccu1, Func1
+	eor XAccu2, Func2
+	eor XAccu3, Func3
+	eor XAccu4, Func4
+	movw Func1, Bck3 /* this prerotates furteh 16 bits*/
+	movw Func3, Bck1 /* so we have now prerotated by 24 bits*/
+	ldi r20, 1 
+	rcall bitrotr 	/* rotr(x,11) */
+	eor XAccu1, Func1
+	eor XAccu2, Func2
+	eor XAccu3, Func3
+	eor XAccu4, Func4 /* finished with SIGMA1, add it to T */
+	add T1, XAccu1
+	adc T2, XAccu2
+	adc T3, XAccu3
+	adc T4, XAccu4
+	/* now we've to add a[7], w[i] and k[i] */
+	ldd XAccu1, Y+4*7
+	ldd XAccu2, Y+4*7+1
+	ldd XAccu3, Y+4*7+2
+	ldd XAccu4, Y+4*7+3
+	add T1, XAccu1
+	adc T2, XAccu2
+	adc T3, XAccu3
+	adc T4, XAccu4 /* add a[7] */
+	ld XAccu1, X+
+	ld XAccu2, X+
+	ld XAccu3, X+
+	ld XAccu4, X+
+	add T1, XAccu1
+	adc T2, XAccu2
+	adc T3, XAccu3
+	adc T4, XAccu4 /* add w[i] */
+	lpm XAccu1, Z+
+	lpm XAccu2, Z+
+	lpm XAccu3, Z+
+	lpm XAccu4, Z+
+	add T1, XAccu1
+	adc T2, XAccu2
+	adc T3, XAccu3
+	adc T4, XAccu4 /* add k[i] */ /* finished with t1 */
+	/*now t2 = SIGMA0(a[0]) + MAJ(a[0],a[1],a[2]) */ /*i did to much x86 asm, i always see 4 32bit regs*/
+		/* starting with MAJ(x,y,z) */
+	ldd Func1, Y+4*0+0
+	ldd Func2, Y+4*0+1
+	ldd Func3, Y+4*0+2
+	ldd Func4, Y+4*0+3 /* load x=a[0] */
+	ldd XAccu1, Y+4*1+0
+	ldd XAccu2, Y+4*1+1
+	ldd XAccu3, Y+4*1+2
+	ldd XAccu4, Y+4*1+3 /* load y=a[1] */
+	and XAccu1, Func1
+	and XAccu2, Func2
+	and XAccu3, Func3
+	and XAccu4, Func4	/* XAccu == (x & y) */
+	ldd Bck1, Y+4*2+0
+	ldd Bck2, Y+4*2+1
+	ldd Bck3, Y+4*2+2
+	ldd Bck4, Y+4*2+3 /* load z=a[2] */
+	and Func1, Bck1
+	and Func2, Bck2
+	and Func3, Bck3
+	and Func4, Bck4
+	eor XAccu1, Func1
+	eor XAccu2, Func2
+	eor XAccu3, Func3
+	eor XAccu4, Func4	/* XAccu == (x & y) ^ (x & z) */
+	ldd Func1, Y+4*1+0
+	ldd Func2, Y+4*1+1
+	ldd Func3, Y+4*1+2
+	ldd Func4, Y+4*1+3 /* load y=a[1] */
+	and Func1, Bck1
+	and Func2, Bck2
+	and Func3, Bck3
+	and Func4, Bck4
+	eor XAccu1, Func1
+	eor XAccu2, Func2
+	eor XAccu3, Func3
+	eor XAccu4, Func4	/* XAccu == Maj(x,y,z) == (x & y) ^ (x & z) ^ (y & z) */
+   	/* SIGMA0(a[0]) */
+	ldd Bck1, Y+4*0+0 /* we should combine this with above */
+	ldd Bck2, Y+4*0+1
+	ldd Bck3, Y+4*0+2
+	ldd Bck4, Y+4*0+3
+	movw Func1, Bck1
+	movw Func3, Bck3
+	ldi r20, 2
+	rcall bitrotr
+	movw Accu1, Func1
+	movw Accu3, Func3 /* Accu = shr(a[0], 2) */
+	movw Func1, Bck3 
+	movw Func3, Bck1 /* prerotate by 16 bits */
+	ldi r20, 3
+	rcall bitrotl
+	eor Accu1, Func1
+	eor Accu2, Func2
+	eor Accu3, Func3
+	eor Accu4, Func4 /* Accu ^= shr(a[0], 13) */
+	mov Func1, Bck4
+	mov Func2, Bck1
+	mov Func3, Bck2
+	mov Func4, Bck3  /* prerotate by 24 bits */
+	ldi r20, 2
+	rcall bitrotl
+	eor Accu1, Func1
+	eor Accu2, Func2
+	eor Accu3, Func3
+	eor Accu4, Func4 /* Accu ^= shr(a[0], 22) */
+	add Accu1, XAccu1 /* add previous result (MAJ)*/
+	adc Accu2, XAccu2
+	adc Accu3, XAccu3
+	adc Accu4, XAccu4
+	/* now we are finished with the computing stuff (t1 in T, t2 in Accu)*/
+	/* a[7]=a[6]; a[6]=a[5]; a[5]=a[4]; a[4]=a[3]; a[3]=a[2]; a[2]=a[1]; a[1]=a[0]; */
+
+	ldi r21, 7*4
+	adiw r28, 7*4
+a_shift_loop:
+	ld  r25, -Y /* warning: this is PREdecrement */
+	std Y+4, r25
+	dec r21
+	brne a_shift_loop
+
+	ldd Bck1, Y+4*4+0
+	ldd Bck2, Y+4*4+1
+	ldd Bck3, Y+4*4+2
+	ldd Bck4, Y+4*4+3
+	add Bck1, T1
+	adc Bck2, T2
+	adc Bck3, T3
+	adc Bck4, T4
+	std Y+4*4+0, Bck1
+	std Y+4*4+1, Bck2
+	std Y+4*4+2, Bck3
+	std Y+4*4+3, Bck4
+	add Accu1, T1
+	adc Accu2, T2
+	adc Accu3, T3
+	adc Accu4, T4
+	std Y+4*0+0, Accu1
+	std Y+4*0+1, Accu2
+	std Y+4*0+2, Accu3
+	std Y+4*0+3, Accu4 /* a array updated */
+	
+	
+	dec LoopC
+	breq update_state
+	rjmp sha256_main_loop ;brne sha256_main_loop
+update_state:	
+	/* update state */
+	/* pointers to state should still exist on the stack ;-) */
+	pop r31
+	pop r30
+	ldi r21, 8
+update_state_loop:
+	ldd Accu1, Z+0
+	ldd Accu2, Z+1
+	ldd Accu3, Z+2
+	ldd Accu4, Z+3 
+	ld Func1, Y+
+	ld Func2, Y+
+	ld Func3, Y+
+	ld Func4, Y+
+	add Accu1, Func1
+	adc Accu2, Func2
+	adc Accu3, Func3
+	adc Accu4, Func4
+	st Z+, Accu1
+	st Z+, Accu2
+	st Z+, Accu3
+	st Z+, Accu4
+	dec r21
+	brne update_state_loop
+	/* now we just have to update the length */
+	adiw r30, 1 /* since we add 512, we can simply skip the LSB */ 
+	ldi r21, 2
+	ldi r22, 6
+	ld r20, Z
+	add r20, r21
+	st Z+, r20	
+	clr r21
+sha256_nextBlock_fix_length:	
+	brcc sha256_nextBlock_epilog
+	ld r20, Z
+	adc r20, r21
+	st Z+, r20
+	dec r22
+	brne sha256_nextBlock_fix_length
+	
+; EPILOG
+sha256_nextBlock_epilog:
+/* now we should clean up the stack */
+	
+	pop r21
+	pop r20
+	in r0, SREG
+	cli ; we want to be uninterrupted while updating SP
+	out SPL, r20
+	out SPH, r21
+	out SREG, r0
+	
+	clr r1
+	pop r29
+	pop r28
+	pop r17
+	pop r16
+	pop r15
+	pop r14
+	pop r13
+	pop r12
+	pop r11
+	pop r10
+	pop r9
+	pop r8
+	pop r7
+	pop r6
+	pop r5
+	pop r4 
+	ret
+
+sha256_kv: ; round-key-vector stored in ProgMem 
+.word	0x2f98, 0x428a, 0x4491, 0x7137, 0xfbcf, 0xb5c0, 0xdba5, 0xe9b5, 0xc25b, 0x3956, 0x11f1, 0x59f1, 0x82a4, 0x923f, 0x5ed5, 0xab1c
+.word	0xaa98, 0xd807, 0x5b01, 0x1283, 0x85be, 0x2431, 0x7dc3, 0x550c, 0x5d74, 0x72be, 0xb1fe, 0x80de, 0x06a7, 0x9bdc, 0xf174, 0xc19b
+.word	0x69c1, 0xe49b, 0x4786, 0xefbe, 0x9dc6, 0x0fc1, 0xa1cc, 0x240c, 0x2c6f, 0x2de9, 0x84aa, 0x4a74, 0xa9dc, 0x5cb0, 0x88da, 0x76f9
+.word	0x5152, 0x983e, 0xc66d, 0xa831, 0x27c8, 0xb003, 0x7fc7, 0xbf59, 0x0bf3, 0xc6e0, 0x9147, 0xd5a7, 0x6351, 0x06ca, 0x2967, 0x1429
+.word	0x0a85, 0x27b7, 0x2138, 0x2e1b, 0x6dfc, 0x4d2c, 0x0d13, 0x5338, 0x7354, 0x650a, 0x0abb, 0x766a, 0xc92e, 0x81c2, 0x2c85, 0x9272
+.word	0xe8a1, 0xa2bf, 0x664b, 0xa81a, 0x8b70, 0xc24b, 0x51a3, 0xc76c, 0xe819, 0xd192, 0x0624, 0xd699, 0x3585, 0xf40e, 0xa070, 0x106a
+.word	0xc116, 0x19a4, 0x6c08, 0x1e37, 0x774c, 0x2748, 0xbcb5, 0x34b0, 0x0cb3, 0x391c, 0xaa4a, 0x4ed8, 0xca4f, 0x5b9c, 0x6ff3, 0x682e
+.word	0x82ee, 0x748f, 0x636f, 0x78a5, 0x7814, 0x84c8, 0x0208, 0x8cc7, 0xfffa, 0x90be, 0x6ceb, 0xa450, 0xa3f7, 0xbef9, 0x78f2, 0xc671
+
+	
+;###########################################################	
+
+.global sha256_init 
+;uint32_t sha256_init_vector[]={
+;  	0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A,
+;	0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19 };
+;
+;void sha256_init(sha256_ctx_t *state){
+;	state->length=0;
+;	memcpy(state->h, sha256_init_vector, 8*4);
+;}
+; param1: (r23,r24) 16-bit pointer to sha256_ctx_t struct in ram
+; modifys: Z(r30,r31), Func1, r22
+sha256_init:
+	movw r26, r24 ; (24,25) --> (26,27) load X with param1
+	ldi r30, lo8((sha256_init_vector))
+	ldi r31, hi8((sha256_init_vector))
+	ldi r22, 32+8
+sha256_init_vloop:	
+	lpm r23, Z+ 
+	st X+, r23
+	dec r22
+	brne sha256_init_vloop
+	ret
+	
+sha256_init_vector:
+.word 0xE667, 0x6A09
+.word 0xAE85, 0xBB67 
+.word 0xF372, 0x3C6E 
+.word 0xF53A, 0xA54F 
+.word 0x527F, 0x510E 
+.word 0x688C, 0x9B05 
+.word 0xD9AB, 0x1F83 
+.word 0xCD19, 0x5BE0
+.word 0x0000, 0x0000
+.word 0x0000, 0x0000
+
+;###########################################################	
+
+.global rotl32
+; === ROTL32 ===
+; function that rotates a 32 bit word to the left
+;  param1: the 32-bit word to rotate
+;	given in r25,r24,r23,r22 (r25 is most significant)
+;  param2: an 8-bit value telling how often to rotate
+;	given in r20
+; modifys: r21, r22
+rotl32:
+	cpi r20, 8
+	brlo bitrotl
+	mov r21, r25
+	mov r25, r24
+	mov r24, r23
+	mov r23, r22
+	mov r22, r21
+	subi r20, 8
+	rjmp rotl32
+bitrotl:
+	clr r21
+	clc
+bitrotl_loop:	
+	tst r20
+	breq fixrotl
+	rol r22
+	rol r23
+	rol r24
+	rol r25
+	rol r21
+	dec r20
+	rjmp bitrotl_loop
+fixrotl:
+	or r22, r21
+	ret
+	
+
+;###########################################################	
+
+.global rotr32
+; === ROTR32 ===
+; function that rotates a 32 bit word to the right
+;  param1: the 32-bit word to rotate
+;	given in r25,r24,r23,22 (r25 is most significant)
+;  param2: an 8-bit value telling how often to rotate
+;	given in r20
+; modifys: r21, r22
+rotr32:
+	cpi r20, 8
+	brlo bitrotr
+	mov r21, r22
+	mov r22, r23
+	mov r23, r24
+	mov r24, r25
+	mov r25, r21
+	subi r20, 8
+	rjmp rotr32
+bitrotr:
+	clr r21
+	clc
+bitrotr_loop:	
+	tst r20
+	breq fixrotr
+	ror r25
+	ror r24
+	ror r23
+	ror r22
+	ror r21
+	dec r20
+	rjmp bitrotr_loop
+fixrotr:
+	or r25, r21
+	ret
+	
+	
+;###########################################################	
+	
+.global change_endian32
+; === change_endian32 ===
+; function that changes the endianess of a 32-bit word
+;  param1: the 32-bit word
+;	given in r25,r24,r23,22 (r25 is most significant)
+;  modifys: r21, r22
+change_endian32:
+	movw r20,  r22 ; (r22,r23) --> (r20,r21)
+	mov r22, r25
+	mov r23, r24
+	mov r24, r21
+	mov r25, r20 
+	ret
+
diff --git a/shacal2/sha256.h b/shacal2/sha256.h
new file mode 100644
index 0000000..24960a3
--- /dev/null
+++ b/shacal2/sha256.h
@@ -0,0 +1,122 @@
+/* sha256.h */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/**
+ * \file	sha256.h
+ * \author  Daniel Otte 
+ * \date    2006-05-16
+ * \license	GPLv3 or later
+ * 
+ */
+
+#ifndef SHA256_H_
+#define SHA256_H_
+
+#define __LITTLE_ENDIAN__
+
+
+#include <stdint.h>
+
+/** \def SHA256_HASH_BITS
+ * defines the size of a SHA-256 hash value in bits
+ */
+
+/** \def SHA256_HASH_BYTES
+ * defines the size of a SHA-256 hash value in bytes
+ */
+
+/** \def SHA256_BLOCK_BITS
+ * defines the size of a SHA-256 input block in bits
+ */
+
+/** \def SHA256_BLOCK_BYTES
+ * defines the size of a SHA-256 input block in bytes
+ */
+
+#define SHA256_HASH_BITS  256
+#define SHA256_HASH_BYTES (SHA256_HASH_BITS/8)
+#define SHA256_BLOCK_BITS 512
+#define SHA256_BLOCK_BYTES (SHA256_BLOCK_BITS/8)
+
+/** \typedef sha256_ctx_t
+ * \brief SHA-256 context type
+ * 
+ * A variable of this type may hold the state of a SHA-256 hashing process
+ */
+typedef struct {
+	uint32_t h[8];
+	uint64_t length;
+} sha256_ctx_t;
+
+/** \typedef sha256_hash_t
+ * \brief SHA-256 hash value type
+ * 
+ * A variable of this type may hold the hash value produced by the
+ * sha256_ctx2hash(sha256_hash_t* dest, const sha256_ctx_t* state) function.
+ */
+typedef uint8_t sha256_hash_t[SHA256_HASH_BYTES];
+
+/** \fn void sha256_init(sha256_ctx_t *state)
+ * \brief initialise a SHA-256 context
+ * 
+ * This function sets a ::sha256_ctx_t to the initial values for hashing.
+ * \param state pointer to the SHA-256 hashing context
+ */
+void sha256_init(sha256_ctx_t *state);
+
+/** \fn void sha256_nextBlock (sha256_ctx_t* state, const void* block)
+ * \brief update the context with a given block
+ * 
+ * This function updates the SHA-256 hash context by processing the given block
+ * of fixed length.
+ * \param state pointer to the SHA-256 hash context
+ * \param block pointer to the block of fixed length (512 bit = 64 byte)
+ */
+void sha256_nextBlock (sha256_ctx_t* state, const void* block);
+
+/** \fn void sha256_lastBlock(sha256_ctx_t* state, const void* block, uint16_t length_b)
+ * \brief finalize the context with the given block 
+ * 
+ * This function finalizes the SHA-256 hash context by processing the given block
+ * of variable length.
+ * \param state pointer to the SHA-256 hash context
+ * \param block pointer to the block of fixed length (512 bit = 64 byte)
+ * \param length_b the length of the block in bits
+ */
+void sha256_lastBlock(sha256_ctx_t* state, const void* block, uint16_t length_b);
+
+/** \fn void sha256_ctx2hash(sha256_hash_t* dest, const sha256_ctx_t* state)
+ * \brief convert the hash state into the hash value
+ * This function reads the context and writes the hash value to the destination
+ * \param dest pointer to the location where the hash value should be written
+ * \param state pointer to the SHA-256 hash context
+ */
+void sha256_ctx2hash(sha256_hash_t* dest, const sha256_ctx_t* state);
+
+/** \fn void sha256(sha256_hash_t* dest, const void* msg, uint32_t length_b)
+ * \brief simple SHA-256 hashing function for direct hashing
+ * 
+ * This function automaticaly hashes a given message of arbitary length with
+ * the SHA-256 hashing algorithm.
+ * \param dest pointer to the location where the hash value is going to be written to
+ * \param msg pointer to the message thats going to be hashed
+ * \param length_b length of the message in bits
+ */
+void sha256(sha256_hash_t* dest, const void* msg, uint32_t length_b);
+
+#endif /*SHA256_H_*/
diff --git a/shacal2/shacal2_enc.c b/shacal2/shacal2_enc.c
new file mode 100644
index 0000000..b5380e5
--- /dev/null
+++ b/shacal2/shacal2_enc.c
@@ -0,0 +1,57 @@
+/* shacal2_enc.c */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/**
+ * \file	shacal2_enc.c
+ * \author	Daniel Otte
+ * \date	2008-05-07
+ * \par License:
+ * GPL
+ * \brief SHACAL2 encryption only implementation.
+ * 
+ */
+
+#include <stdint.h>
+#include <string.h>
+#include "sha256.h"
+#include "shacal2_enc.h"
+
+
+void shacal2_enc(void* buffer, void* key, uint16_t keysize_b){
+	uint8_t i;
+	sha256_ctx_t ctx, t_ctx;
+	memcpy(ctx.h, buffer, SHACAL2_BLOCKSIZE_B);
+	
+	uint8_t keybuffer[SHACAL2_KEYSIZE_B];
+	memset(keybuffer, 0, SHACAL2_KEYSIZE_B);
+	if(keysize_b>SHACAL2_KEYSIZE)
+		keysize_b=SHACAL2_KEYSIZE;
+	memcpy(keybuffer, key, (keysize_b+7)/8);
+	
+	memcpy(t_ctx.h, buffer, SHACAL2_BLOCKSIZE_B);
+	sha256_ctx2hash((sha256_hash_t*)(&(ctx.h[0])), &t_ctx);
+	memcpy(t_ctx.h, ctx.h, SHACAL2_BLOCKSIZE_B);
+	sha256_nextBlock(&ctx, keybuffer);
+	for(i=0; i<SHACAL2_BLOCKSIZE/32; ++i){
+		ctx.h[i] -= t_ctx.h[i];
+	}
+	sha256_ctx2hash(buffer, &ctx);
+}
+
+
+
diff --git a/shacal2/shacal2_enc.h b/shacal2/shacal2_enc.h
new file mode 100644
index 0000000..ffa277c
--- /dev/null
+++ b/shacal2/shacal2_enc.h
@@ -0,0 +1,33 @@
+/* shacal2_enc.h */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+#ifndef SHACAL2_ENC_H_
+#define SHACAL2_ENC_H_
+
+#include <stdint.h>
+#include "sha256.h"
+
+#define SHACAL2_BLOCKSIZE  SHA256_HASH_BITS
+#define SHACAL2_BLOCKSIZE_B ((SHACAL2_BLOCKSIZE+7)/8)
+#define SHACAL2_KEYSIZE SHA256_BLOCK_BITS
+#define SHACAL2_KEYSIZE_B ((SHACAL2_KEYSIZE+7)/8)
+
+void shacal2_enc(void* buffer, void* key, uint16_t keysize_b);
+
+
+#endif /*SHACAL2_ENC_H_*/
diff --git a/shacal2_enc.c b/shacal2_enc.c
deleted file mode 100644
index b5380e5..0000000
--- a/shacal2_enc.c
+++ /dev/null
@@ -1,57 +0,0 @@
-/* shacal2_enc.c */
-/*
-    This file is part of the AVR-Crypto-Lib.
-    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-/**
- * \file	shacal2_enc.c
- * \author	Daniel Otte
- * \date	2008-05-07
- * \par License:
- * GPL
- * \brief SHACAL2 encryption only implementation.
- * 
- */
-
-#include <stdint.h>
-#include <string.h>
-#include "sha256.h"
-#include "shacal2_enc.h"
-
-
-void shacal2_enc(void* buffer, void* key, uint16_t keysize_b){
-	uint8_t i;
-	sha256_ctx_t ctx, t_ctx;
-	memcpy(ctx.h, buffer, SHACAL2_BLOCKSIZE_B);
-	
-	uint8_t keybuffer[SHACAL2_KEYSIZE_B];
-	memset(keybuffer, 0, SHACAL2_KEYSIZE_B);
-	if(keysize_b>SHACAL2_KEYSIZE)
-		keysize_b=SHACAL2_KEYSIZE;
-	memcpy(keybuffer, key, (keysize_b+7)/8);
-	
-	memcpy(t_ctx.h, buffer, SHACAL2_BLOCKSIZE_B);
-	sha256_ctx2hash((sha256_hash_t*)(&(ctx.h[0])), &t_ctx);
-	memcpy(t_ctx.h, ctx.h, SHACAL2_BLOCKSIZE_B);
-	sha256_nextBlock(&ctx, keybuffer);
-	for(i=0; i<SHACAL2_BLOCKSIZE/32; ++i){
-		ctx.h[i] -= t_ctx.h[i];
-	}
-	sha256_ctx2hash(buffer, &ctx);
-}
-
-
-
diff --git a/shacal2_enc.h b/shacal2_enc.h
deleted file mode 100644
index ffa277c..0000000
--- a/shacal2_enc.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/* shacal2_enc.h */
-/*
-    This file is part of the AVR-Crypto-Lib.
-    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-#ifndef SHACAL2_ENC_H_
-#define SHACAL2_ENC_H_
-
-#include <stdint.h>
-#include "sha256.h"
-
-#define SHACAL2_BLOCKSIZE  SHA256_HASH_BITS
-#define SHACAL2_BLOCKSIZE_B ((SHACAL2_BLOCKSIZE+7)/8)
-#define SHACAL2_KEYSIZE SHA256_BLOCK_BITS
-#define SHACAL2_KEYSIZE_B ((SHACAL2_KEYSIZE+7)/8)
-
-void shacal2_enc(void* buffer, void* key, uint16_t keysize_b);
-
-
-#endif /*SHACAL2_ENC_H_*/
diff --git a/skipjack.c b/skipjack.c
deleted file mode 100644
index 04eb437..0000000
--- a/skipjack.c
+++ /dev/null
@@ -1,184 +0,0 @@
-/* skipjack.c */
-/*
-    This file is part of the AVR-Crypto-Lib.
-    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-/**
- * 
- * 
- * 
- * 
- * 
- * 
- */
-
-
-#include <stdint.h>
-#include <avr/io.h>
-#include <avr/pgmspace.h>
-
-#define SKIPJACK_CNT_BIG
-
-#ifdef SKIPJACK_CNT_BIG
-  #define SKIPJACK_CNT_SHIFT <<8
-#else
-  #define SKIPJACK_CNT_SHIFT
-#endif
-
-/*****************************************************************************/
-
-uint8_t skipjack_ftable[] PROGMEM ={ 
-	0xa3, 0xd7, 0x09, 0x83, 0xf8, 0x48, 0xf6, 0xf4, 
-	0xb3, 0x21, 0x15, 0x78, 0x99, 0xb1, 0xaf, 0xf9, 
-	0xe7, 0x2d, 0x4d, 0x8a, 0xce, 0x4c, 0xca, 0x2e, 
-	0x52, 0x95, 0xd9, 0x1e, 0x4e, 0x38, 0x44, 0x28, 
-	0x0a, 0xdf, 0x02, 0xa0, 0x17, 0xf1, 0x60, 0x68, 
-	0x12, 0xb7, 0x7a, 0xc3, 0xe9, 0xfa, 0x3d, 0x53, 
-	0x96, 0x84, 0x6b, 0xba, 0xf2, 0x63, 0x9a, 0x19, 
-	0x7c, 0xae, 0xe5, 0xf5, 0xf7, 0x16, 0x6a, 0xa2, 
-	0x39, 0xb6, 0x7b, 0x0f, 0xc1, 0x93, 0x81, 0x1b, 
-	0xee, 0xb4, 0x1a, 0xea, 0xd0, 0x91, 0x2f, 0xb8, 
-	0x55, 0xb9, 0xda, 0x85, 0x3f, 0x41, 0xbf, 0xe0, 
-	0x5a, 0x58, 0x80, 0x5f, 0x66, 0x0b, 0xd8, 0x90, 
-	0x35, 0xd5, 0xc0, 0xa7, 0x33, 0x06, 0x65, 0x69, 
-	0x45, 0x00, 0x94, 0x56, 0x6d, 0x98, 0x9b, 0x76, 
-	0x97, 0xfc, 0xb2, 0xc2, 0xb0, 0xfe, 0xdb, 0x20, 
-	0xe1, 0xeb, 0xd6, 0xe4, 0xdd, 0x47, 0x4a, 0x1d, 
-	0x42, 0xed, 0x9e, 0x6e, 0x49, 0x3c, 0xcd, 0x43, 
-	0x27, 0xd2, 0x07, 0xd4, 0xde, 0xc7, 0x67, 0x18, 
-	0x89, 0xcb, 0x30, 0x1f, 0x8d, 0xc6, 0x8f, 0xaa, 
-	0xc8, 0x74, 0xdc, 0xc9, 0x5d, 0x5c, 0x31, 0xa4, 
-	0x70, 0x88, 0x61, 0x2c, 0x9f, 0x0d, 0x2b, 0x87, 
-	0x50, 0x82, 0x54, 0x64, 0x26, 0x7d, 0x03, 0x40, 
-	0x34, 0x4b, 0x1c, 0x73, 0xd1, 0xc4, 0xfd, 0x3b, 
-	0xcc, 0xfb, 0x7f, 0xab, 0xe6, 0x3e, 0x5b, 0xa5, 
-	0xad, 0x04, 0x23, 0x9c, 0x14, 0x51, 0x22, 0xf0, 
-	0x29, 0x79, 0x71, 0x7e, 0xff, 0x8c, 0x0e, 0xe2, 
-	0x0c, 0xef, 0xbc, 0x72, 0x75, 0x6f, 0x37, 0xa1, 
-	0xec, 0xd3, 0x8e, 0x62, 0x8b, 0x86, 0x10, 0xe8, 
-	0x08, 0x77, 0x11, 0xbe, 0x92, 0x4f, 0x24, 0xc5, 
-	0x32, 0x36, 0x9d, 0xcf, 0xf3, 0xa6, 0xbb, 0xac, 
-	0x5e, 0x6c, 0xa9, 0x13, 0x57, 0x25, 0xb5, 0xe3, 
-	0xbd, 0xa8, 0x3a, 0x01, 0x05, 0x59, 0x2a, 0x46
-};
-
-/*****************************************************************************/
-
-uint16_t skipjack_g(uint16_t g, uint8_t k, uint8_t *key){
-	#define G1 (((uint8_t*)&g)[0])
-	#define G2 (((uint8_t*)&g)[1])
-	/* this could also be rolled up */
-	G1 ^= pgm_read_byte_near(&(skipjack_ftable[G2 ^ key[(4*k+0)%10]]));
-	G2 ^= pgm_read_byte_near(&(skipjack_ftable[G1 ^ key[(4*k+1)%10]]));
-	G1 ^= pgm_read_byte_near(&(skipjack_ftable[G2 ^ key[(4*k+2)%10]]));
-	G2 ^= pgm_read_byte_near(&(skipjack_ftable[G1 ^ key[(4*k+3)%10]]));
-	return g;	
-}
-
-/*****************************************************************************/
-
-uint16_t skipjack_g_inv(uint16_t g, uint8_t k, uint8_t *key){
-//	#define G1 (((uint8_t)&g)[1])
-//	#define G2 (((uint8_t)&g)[0])
-	/* this could also be rolled up */
-	G2 ^= pgm_read_byte_near(&(skipjack_ftable[G1 ^ key[(4*k+3)%10]]));
-	G1 ^= pgm_read_byte_near(&(skipjack_ftable[G2 ^ key[(4*k+2)%10]]));
-	G2 ^= pgm_read_byte_near(&(skipjack_ftable[G1 ^ key[(4*k+1)%10]]));
-	G1 ^= pgm_read_byte_near(&(skipjack_ftable[G2 ^ key[(4*k+0)%10]]));
-	return g;	
-}
-
-/*****************************************************************************/
-
-void skipjack_a(uint16_t* w, uint8_t k, uint8_t* key){
-	uint16_t t;
-
-	t = w[3];
-	w[3] = w[2];
-	w[2] = w[1];
-	w[1] = skipjack_g(w[0],k-1,key);
-	w[0] = t ^ w[1] ^ (((uint16_t)k)SKIPJACK_CNT_SHIFT);
-}
-
-/*****************************************************************************/
-
-void skipjack_a_inv(uint16_t* w, uint8_t k, uint8_t* key){
-	uint16_t t;
-	t = w[0] ^ w[1];
-	w[0] = skipjack_g_inv(w[1],k-1,key);
-	w[1] = w[2];
-	w[2] = w[3];
-	w[3] = t ^ (((uint16_t)k)SKIPJACK_CNT_SHIFT);
-}
-
-/*****************************************************************************/
-
-void skipjack_b(uint16_t* w, uint8_t k, uint8_t* key){
-	uint16_t t;
-	t = w[0];
-	w[0] = w[3];
-	w[3] = w[2];
-	w[2] = t ^ (((uint16_t)k)SKIPJACK_CNT_SHIFT) ^ w[1];
-	w[1] = skipjack_g(t,k-1,key);
-}
-
-/*****************************************************************************/
-
-void skipjack_b_inv(uint16_t* w, uint8_t k, uint8_t* key){
-	uint16_t t;
-	t = w[2];
-	w[2] = w[3];
-	w[3] = w[0];
-	w[0] = skipjack_g_inv(w[1],k-1,key);
-	w[1] = w[0] ^ t ^ (((uint16_t)k)SKIPJACK_CNT_SHIFT);
-}
-
-/*****************************************************************************/
-/**
- * block is 64 bits (=8 bytes) in size, key is 80 bits (=10 bytes) in size.
- * 
- */
-void skipjack_enc(void* block, void* key){
-	uint8_t k;
-	for(k=0; k<32; ++k){
-		if(k & 0x08){
-			skipjack_b((uint16_t*)block, k+1, key);
-		} else {
-			skipjack_a((uint16_t*)block, k+1, key);
-		}
-	}
-}
-
-/*****************************************************************************/
-/**
- * block is 64 bits (=8 bytes) in size, key is 80 bits (=10 bytes) in size.
- * 
- */
-void skipjack_dec(void* block, void* key){
-	int8_t k;
-	for(k=31; k>=0; --k){
-		if(k & 0x08){
-			skipjack_b_inv((uint16_t*)block, k+1, key);
-		} else {
-			skipjack_a_inv((uint16_t*)block, k+1, key);
-		}
-	}
-}
-
-
-
-
-
diff --git a/skipjack.h b/skipjack.h
deleted file mode 100644
index 00f033d..0000000
--- a/skipjack.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/* skipjack.h */
-/*
-    This file is part of the AVR-Crypto-Lib.
-    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-#ifndef SKIPJACK_H_
-#define SKIPJACK_H_
-
-/** 
- * \file      skipjack.h
- * \author    Daniel Otte
- * \date      2006-11-01
- * \license   GPLv3 or later
- * \brief     Implementation of the serpent sbox function.
- * 
- */
-
-
-#include <stdint.h>
-
-/** \fn void skipjack_enc(void* block, void* key)
- * \brief skipjack encryption function
- * 
- * This function encrypts a block of plaintext with the Skipjac encryption
- * algorithm. The block is 64 bit (8 byte) long, the key is 80 bit (10 byte)
- * long.
- * \param block pointer to the 64 bit (8 byte) block to encrypt
- * \param key   pointer to the 80 bit (10 byte) key 
- */
-void skipjack_enc(void* block, void* key);
-
-/** \fn void skipjack_dec(void* block, void* key)
- * \brief skipjack decryption function
- * 
- * This function decrypts a block of ciphertext encrypted with the Skipjac
- * encryption algorithm. 
- * The block is 64 bit (8 byte) long, the key is 80 bit (10 byte) long.
- * \param block pointer to the 64 bit (8 byte) block to decrypt
- * \param key   pointer to the 80 bit (10 byte) key 
- */
-void skipjack_dec(void* block, void* key);
-
-#endif /*SKIPJACK_H_*/
diff --git a/skipjack/skipjack.c b/skipjack/skipjack.c
new file mode 100644
index 0000000..04eb437
--- /dev/null
+++ b/skipjack/skipjack.c
@@ -0,0 +1,184 @@
+/* skipjack.c */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/**
+ * 
+ * 
+ * 
+ * 
+ * 
+ * 
+ */
+
+
+#include <stdint.h>
+#include <avr/io.h>
+#include <avr/pgmspace.h>
+
+#define SKIPJACK_CNT_BIG
+
+#ifdef SKIPJACK_CNT_BIG
+  #define SKIPJACK_CNT_SHIFT <<8
+#else
+  #define SKIPJACK_CNT_SHIFT
+#endif
+
+/*****************************************************************************/
+
+uint8_t skipjack_ftable[] PROGMEM ={ 
+	0xa3, 0xd7, 0x09, 0x83, 0xf8, 0x48, 0xf6, 0xf4, 
+	0xb3, 0x21, 0x15, 0x78, 0x99, 0xb1, 0xaf, 0xf9, 
+	0xe7, 0x2d, 0x4d, 0x8a, 0xce, 0x4c, 0xca, 0x2e, 
+	0x52, 0x95, 0xd9, 0x1e, 0x4e, 0x38, 0x44, 0x28, 
+	0x0a, 0xdf, 0x02, 0xa0, 0x17, 0xf1, 0x60, 0x68, 
+	0x12, 0xb7, 0x7a, 0xc3, 0xe9, 0xfa, 0x3d, 0x53, 
+	0x96, 0x84, 0x6b, 0xba, 0xf2, 0x63, 0x9a, 0x19, 
+	0x7c, 0xae, 0xe5, 0xf5, 0xf7, 0x16, 0x6a, 0xa2, 
+	0x39, 0xb6, 0x7b, 0x0f, 0xc1, 0x93, 0x81, 0x1b, 
+	0xee, 0xb4, 0x1a, 0xea, 0xd0, 0x91, 0x2f, 0xb8, 
+	0x55, 0xb9, 0xda, 0x85, 0x3f, 0x41, 0xbf, 0xe0, 
+	0x5a, 0x58, 0x80, 0x5f, 0x66, 0x0b, 0xd8, 0x90, 
+	0x35, 0xd5, 0xc0, 0xa7, 0x33, 0x06, 0x65, 0x69, 
+	0x45, 0x00, 0x94, 0x56, 0x6d, 0x98, 0x9b, 0x76, 
+	0x97, 0xfc, 0xb2, 0xc2, 0xb0, 0xfe, 0xdb, 0x20, 
+	0xe1, 0xeb, 0xd6, 0xe4, 0xdd, 0x47, 0x4a, 0x1d, 
+	0x42, 0xed, 0x9e, 0x6e, 0x49, 0x3c, 0xcd, 0x43, 
+	0x27, 0xd2, 0x07, 0xd4, 0xde, 0xc7, 0x67, 0x18, 
+	0x89, 0xcb, 0x30, 0x1f, 0x8d, 0xc6, 0x8f, 0xaa, 
+	0xc8, 0x74, 0xdc, 0xc9, 0x5d, 0x5c, 0x31, 0xa4, 
+	0x70, 0x88, 0x61, 0x2c, 0x9f, 0x0d, 0x2b, 0x87, 
+	0x50, 0x82, 0x54, 0x64, 0x26, 0x7d, 0x03, 0x40, 
+	0x34, 0x4b, 0x1c, 0x73, 0xd1, 0xc4, 0xfd, 0x3b, 
+	0xcc, 0xfb, 0x7f, 0xab, 0xe6, 0x3e, 0x5b, 0xa5, 
+	0xad, 0x04, 0x23, 0x9c, 0x14, 0x51, 0x22, 0xf0, 
+	0x29, 0x79, 0x71, 0x7e, 0xff, 0x8c, 0x0e, 0xe2, 
+	0x0c, 0xef, 0xbc, 0x72, 0x75, 0x6f, 0x37, 0xa1, 
+	0xec, 0xd3, 0x8e, 0x62, 0x8b, 0x86, 0x10, 0xe8, 
+	0x08, 0x77, 0x11, 0xbe, 0x92, 0x4f, 0x24, 0xc5, 
+	0x32, 0x36, 0x9d, 0xcf, 0xf3, 0xa6, 0xbb, 0xac, 
+	0x5e, 0x6c, 0xa9, 0x13, 0x57, 0x25, 0xb5, 0xe3, 
+	0xbd, 0xa8, 0x3a, 0x01, 0x05, 0x59, 0x2a, 0x46
+};
+
+/*****************************************************************************/
+
+uint16_t skipjack_g(uint16_t g, uint8_t k, uint8_t *key){
+	#define G1 (((uint8_t*)&g)[0])
+	#define G2 (((uint8_t*)&g)[1])
+	/* this could also be rolled up */
+	G1 ^= pgm_read_byte_near(&(skipjack_ftable[G2 ^ key[(4*k+0)%10]]));
+	G2 ^= pgm_read_byte_near(&(skipjack_ftable[G1 ^ key[(4*k+1)%10]]));
+	G1 ^= pgm_read_byte_near(&(skipjack_ftable[G2 ^ key[(4*k+2)%10]]));
+	G2 ^= pgm_read_byte_near(&(skipjack_ftable[G1 ^ key[(4*k+3)%10]]));
+	return g;	
+}
+
+/*****************************************************************************/
+
+uint16_t skipjack_g_inv(uint16_t g, uint8_t k, uint8_t *key){
+//	#define G1 (((uint8_t)&g)[1])
+//	#define G2 (((uint8_t)&g)[0])
+	/* this could also be rolled up */
+	G2 ^= pgm_read_byte_near(&(skipjack_ftable[G1 ^ key[(4*k+3)%10]]));
+	G1 ^= pgm_read_byte_near(&(skipjack_ftable[G2 ^ key[(4*k+2)%10]]));
+	G2 ^= pgm_read_byte_near(&(skipjack_ftable[G1 ^ key[(4*k+1)%10]]));
+	G1 ^= pgm_read_byte_near(&(skipjack_ftable[G2 ^ key[(4*k+0)%10]]));
+	return g;	
+}
+
+/*****************************************************************************/
+
+void skipjack_a(uint16_t* w, uint8_t k, uint8_t* key){
+	uint16_t t;
+
+	t = w[3];
+	w[3] = w[2];
+	w[2] = w[1];
+	w[1] = skipjack_g(w[0],k-1,key);
+	w[0] = t ^ w[1] ^ (((uint16_t)k)SKIPJACK_CNT_SHIFT);
+}
+
+/*****************************************************************************/
+
+void skipjack_a_inv(uint16_t* w, uint8_t k, uint8_t* key){
+	uint16_t t;
+	t = w[0] ^ w[1];
+	w[0] = skipjack_g_inv(w[1],k-1,key);
+	w[1] = w[2];
+	w[2] = w[3];
+	w[3] = t ^ (((uint16_t)k)SKIPJACK_CNT_SHIFT);
+}
+
+/*****************************************************************************/
+
+void skipjack_b(uint16_t* w, uint8_t k, uint8_t* key){
+	uint16_t t;
+	t = w[0];
+	w[0] = w[3];
+	w[3] = w[2];
+	w[2] = t ^ (((uint16_t)k)SKIPJACK_CNT_SHIFT) ^ w[1];
+	w[1] = skipjack_g(t,k-1,key);
+}
+
+/*****************************************************************************/
+
+void skipjack_b_inv(uint16_t* w, uint8_t k, uint8_t* key){
+	uint16_t t;
+	t = w[2];
+	w[2] = w[3];
+	w[3] = w[0];
+	w[0] = skipjack_g_inv(w[1],k-1,key);
+	w[1] = w[0] ^ t ^ (((uint16_t)k)SKIPJACK_CNT_SHIFT);
+}
+
+/*****************************************************************************/
+/**
+ * block is 64 bits (=8 bytes) in size, key is 80 bits (=10 bytes) in size.
+ * 
+ */
+void skipjack_enc(void* block, void* key){
+	uint8_t k;
+	for(k=0; k<32; ++k){
+		if(k & 0x08){
+			skipjack_b((uint16_t*)block, k+1, key);
+		} else {
+			skipjack_a((uint16_t*)block, k+1, key);
+		}
+	}
+}
+
+/*****************************************************************************/
+/**
+ * block is 64 bits (=8 bytes) in size, key is 80 bits (=10 bytes) in size.
+ * 
+ */
+void skipjack_dec(void* block, void* key){
+	int8_t k;
+	for(k=31; k>=0; --k){
+		if(k & 0x08){
+			skipjack_b_inv((uint16_t*)block, k+1, key);
+		} else {
+			skipjack_a_inv((uint16_t*)block, k+1, key);
+		}
+	}
+}
+
+
+
+
+
diff --git a/skipjack/skipjack.h b/skipjack/skipjack.h
new file mode 100644
index 0000000..00f033d
--- /dev/null
+++ b/skipjack/skipjack.h
@@ -0,0 +1,56 @@
+/* skipjack.h */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+#ifndef SKIPJACK_H_
+#define SKIPJACK_H_
+
+/** 
+ * \file      skipjack.h
+ * \author    Daniel Otte
+ * \date      2006-11-01
+ * \license   GPLv3 or later
+ * \brief     Implementation of the serpent sbox function.
+ * 
+ */
+
+
+#include <stdint.h>
+
+/** \fn void skipjack_enc(void* block, void* key)
+ * \brief skipjack encryption function
+ * 
+ * This function encrypts a block of plaintext with the Skipjac encryption
+ * algorithm. The block is 64 bit (8 byte) long, the key is 80 bit (10 byte)
+ * long.
+ * \param block pointer to the 64 bit (8 byte) block to encrypt
+ * \param key   pointer to the 80 bit (10 byte) key 
+ */
+void skipjack_enc(void* block, void* key);
+
+/** \fn void skipjack_dec(void* block, void* key)
+ * \brief skipjack decryption function
+ * 
+ * This function decrypts a block of ciphertext encrypted with the Skipjac
+ * encryption algorithm. 
+ * The block is 64 bit (8 byte) long, the key is 80 bit (10 byte) long.
+ * \param block pointer to the 64 bit (8 byte) block to decrypt
+ * \param key   pointer to the 80 bit (10 byte) key 
+ */
+void skipjack_dec(void* block, void* key);
+
+#endif /*SKIPJACK_H_*/
diff --git a/test_src/main-hmac-md5-test.c b/test_src/main-hmac-md5-test.c
index dfbcead..f61e119 100644
--- a/test_src/main-hmac-md5-test.c
+++ b/test_src/main-hmac-md5-test.c
@@ -28,9 +28,10 @@
 
 #include "md5.h"
 #include "hmac-md5.h"
+/*
 #include "base64_enc.h"
 #include "base64_dec.h"
-
+*/
 #include "nessie_mac_test.h"
 
 #include <stdint.h>
@@ -109,6 +110,7 @@ void strhexdump(char* dest, void* src, uint16_t length){
 	}
 }
 
+/*
 void cram_md5_interactive(void){
 	char key[101];
 	char msg_b64[101];
@@ -137,9 +139,9 @@ void cram_md5_interactive(void){
 	cli_putstr_P(PSTR("\r\nresponse: "));
 	cli_hexdump(hmac, HMAC_MD5_BYTES);
 	cli_putstr_P(PSTR("\r\nresponse (b64): "));
-	cli_putstr(msg_b64);
-	
+	cli_putstr(msg_b64);	
 }
+*/
 
 
 void md5_interactive(void){
@@ -164,7 +166,7 @@ const char test_str[]        PROGMEM = "test";
 /* const char performance_str[] PROGMEM = "performance"; */
 const char echo_str[]        PROGMEM = "echo";
 const char hmd5i_str[]       PROGMEM = "hmac-md5";
-const char crammd5i_str[]    PROGMEM = "cram-md5";
+/* const char crammd5i_str[]    PROGMEM = "cram-md5"; */
 const char md5i_str[]        PROGMEM = "md5";
 
 
@@ -172,7 +174,7 @@ cmdlist_entry_t cmdlist[] PROGMEM = {
 	{ nessie_str,      NULL, testrun_nessie_hmacmd5},
 	{ test_str,        NULL, testrun_test_hmacmd5},
 	{ hmd5i_str,       NULL, hmacmd5_interactive},
-	{ crammd5i_str,    NULL, cram_md5_interactive},
+/*	{ crammd5i_str,    NULL, cram_md5_interactive},        */
 	{ md5i_str,        NULL, md5_interactive},
 /*	{ performance_str, NULL, testrun_performance_hmacmd5}, */
 	{ echo_str,    (void*)1, (void_fpt)echo_ctrl},
diff --git a/trivium.c b/trivium.c
deleted file mode 100644
index 3ac69c3..0000000
--- a/trivium.c
+++ /dev/null
@@ -1,84 +0,0 @@
-/* trivium.c */
-/*
-    This file is part of the AVR-Crypto-Lib.
-    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-/**
- * 
- * author: Daniel Otte
- * email:  daniel.otte@rub.de
- * license: GPLv3
- * 
- */
-
- 
-#include <stdint.h>
-#include <string.h>
-#include "trivium.h"
-
-#define S(i) ((((*ctx)[(i)/8])>>((i)%8))&1)
-uint8_t trivium_enc(trivium_ctx_t* ctx){
-	uint8_t t1,t2,t3,z;
-	
-	t1 = S(65)  ^ S(92);
-	t2 = S(161) ^ S(176);
-	t3 = S(242) ^ S(287);
-	z  = t1^t2^t3;
-	t1 ^= (S(90)  & S(91))  ^ S(170);
-	t2 ^= (S(174) & S(175)) ^ S(263);
-	t3 ^= (S(285) & S(286)) ^ S(68);
-	
-	/* shift whole state and insert ts later */
-	uint8_t i,c1=0,c2;
-	for(i=0; i<36; ++i){
-		c2=(((*ctx)[i])>>7);
-		(*ctx)[i] = (((*ctx)[i])<<1)|c1;
-		c1=c2;
-	}
-	/* insert ts */
-	(*ctx)[0] = (((*ctx)[0])&0xFE)| t3; /* s0*/
-	(*ctx)[93/8] = (((*ctx)[93/8])& (~(1<<(93%8)))) | (t1<<(93%8)); /* s93 */
-	(*ctx)[177/8] = (((*ctx)[177/8])& (~(1<<(177%8)))) | (t2<<(177%8));/* s177 */
-	
-	return z;
-}
-
-#define KEYSIZE_B ((keysize_b+7)/8)
-#define IVSIZE_B  ((ivsize_b +7)/8)
-
-void trivium_init(const void* key, uint8_t keysize_b, 
-                  const void* iv,  uint8_t ivsize_b,
-                  trivium_ctx_t* ctx){
-	uint16_t i;
-	uint8_t c1=0,c2;
-
-	memset((*ctx)+KEYSIZE_B, 0, 35-KEYSIZE_B);
-	memcpy((*ctx), key, KEYSIZE_B);
-	memcpy((*ctx)+12, iv, IVSIZE_B); /* iv0 is at s96, must shift to s93 */
-	
-	for(i=12+IVSIZE_B; i>10; --i){
-		c2=(((*ctx)[i])<<5);
-		(*ctx)[i] = (((*ctx)[i])>>3)|c1;
-		c1=c2;
-	}
-	(*ctx)[35]=0xE0;
-	
-	for(i=0; i<4*288; ++i){
-		trivium_enc(ctx);
-	}
-}
-
-
diff --git a/trivium.h b/trivium.h
deleted file mode 100644
index 14a005f..0000000
--- a/trivium.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/* trivium.h */
-/*
-    This file is part of the AVR-Crypto-Lib.
-    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-#ifndef TRIVIUM_H_
-#define TRIVIUM_H_
-
-typedef uint8_t trivium_ctx_t[36]; /* 288bit */
-
-uint8_t trivium_enc(trivium_ctx_t* ctx);
-void trivium_init(const void* key, uint8_t keysize_b, 
-                  const void* iv,  uint8_t ivsize_b,
-                  trivium_ctx_t* ctx);
-
-#endif /*TRIVIUM_H_*/
diff --git a/trivium/trivium.c b/trivium/trivium.c
new file mode 100644
index 0000000..3ac69c3
--- /dev/null
+++ b/trivium/trivium.c
@@ -0,0 +1,84 @@
+/* trivium.c */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/**
+ * 
+ * author: Daniel Otte
+ * email:  daniel.otte@rub.de
+ * license: GPLv3
+ * 
+ */
+
+ 
+#include <stdint.h>
+#include <string.h>
+#include "trivium.h"
+
+#define S(i) ((((*ctx)[(i)/8])>>((i)%8))&1)
+uint8_t trivium_enc(trivium_ctx_t* ctx){
+	uint8_t t1,t2,t3,z;
+	
+	t1 = S(65)  ^ S(92);
+	t2 = S(161) ^ S(176);
+	t3 = S(242) ^ S(287);
+	z  = t1^t2^t3;
+	t1 ^= (S(90)  & S(91))  ^ S(170);
+	t2 ^= (S(174) & S(175)) ^ S(263);
+	t3 ^= (S(285) & S(286)) ^ S(68);
+	
+	/* shift whole state and insert ts later */
+	uint8_t i,c1=0,c2;
+	for(i=0; i<36; ++i){
+		c2=(((*ctx)[i])>>7);
+		(*ctx)[i] = (((*ctx)[i])<<1)|c1;
+		c1=c2;
+	}
+	/* insert ts */
+	(*ctx)[0] = (((*ctx)[0])&0xFE)| t3; /* s0*/
+	(*ctx)[93/8] = (((*ctx)[93/8])& (~(1<<(93%8)))) | (t1<<(93%8)); /* s93 */
+	(*ctx)[177/8] = (((*ctx)[177/8])& (~(1<<(177%8)))) | (t2<<(177%8));/* s177 */
+	
+	return z;
+}
+
+#define KEYSIZE_B ((keysize_b+7)/8)
+#define IVSIZE_B  ((ivsize_b +7)/8)
+
+void trivium_init(const void* key, uint8_t keysize_b, 
+                  const void* iv,  uint8_t ivsize_b,
+                  trivium_ctx_t* ctx){
+	uint16_t i;
+	uint8_t c1=0,c2;
+
+	memset((*ctx)+KEYSIZE_B, 0, 35-KEYSIZE_B);
+	memcpy((*ctx), key, KEYSIZE_B);
+	memcpy((*ctx)+12, iv, IVSIZE_B); /* iv0 is at s96, must shift to s93 */
+	
+	for(i=12+IVSIZE_B; i>10; --i){
+		c2=(((*ctx)[i])<<5);
+		(*ctx)[i] = (((*ctx)[i])>>3)|c1;
+		c1=c2;
+	}
+	(*ctx)[35]=0xE0;
+	
+	for(i=0; i<4*288; ++i){
+		trivium_enc(ctx);
+	}
+}
+
+
diff --git a/trivium/trivium.h b/trivium/trivium.h
new file mode 100644
index 0000000..14a005f
--- /dev/null
+++ b/trivium/trivium.h
@@ -0,0 +1,29 @@
+/* trivium.h */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+#ifndef TRIVIUM_H_
+#define TRIVIUM_H_
+
+typedef uint8_t trivium_ctx_t[36]; /* 288bit */
+
+uint8_t trivium_enc(trivium_ctx_t* ctx);
+void trivium_init(const void* key, uint8_t keysize_b, 
+                  const void* iv,  uint8_t ivsize_b,
+                  trivium_ctx_t* ctx);
+
+#endif /*TRIVIUM_H_*/
diff --git a/xtea-asm.S b/xtea-asm.S
deleted file mode 100644
index 826f123..0000000
--- a/xtea-asm.S
+++ /dev/null
@@ -1,585 +0,0 @@
-/* xtea-asm.S */
-/*
-    This file is part of the AVR-Crypto-Lib.
-    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-/* xtea-asm.S 
- * Author:      Daniel Otte
- * Date:        2006-06-06
- * License:     GPLv3 or later
- *  Implementation of XTEA for AVR
- *  include xtea.h in your C-Project to use this functions.
-*/
-
-V01 = 2
-V02 = 3
-V03 = 4
-V04 = 5
-V11 = 6
-V12 = 7
-V13 = 8
-V14 = 9
-Accu1 = 14
-Accu2 = 15
-Accu3 = 16
-Accu4 = 17
-Sum1 = 18
-Sum2 = 19
-Sum3 = 20
-Sum4 = 21
-Func1 = 22
-Func2 = 23
-Func3 = 24
-Func4 = 25
-C = 28 /* der kleine Zaehler fuer zwischendurch */
-
-.global xtea_enc
-; == xtea_enc ==
-; xtea encrytion function
-; param1: 16-bit pointer to destination for encrypted block 
-;  given in r25,r24
-; param2: 16-bit pointer to the block (64-bit) which is to encrypt 
-;  given in r23,r22
-; param3: 16-bit pointer to the key (128-bit) 
-;  given in r21,r20
-;
-xtea_enc:
- /* prolog */
- 	push r2
- 	push r3
- 	push r4
- 	push r5
- 	push r6
- 	push r7
- 	push r8
- 	push r9
- 	push r14
- 	push r15
- 	push r16
- 	push r17
- 	push r28
- 	
- /* load the block */
- 	movw r26, r22 /* X points to block */
- 	movw r30, r20 /* Z points to key   */
- 	ld V01, X+
- 	ld V02, X+
- 	ld V03, X+
- 	ld V04, X+
- 	ld V11, X+
- 	ld V12, X+
- 	ld V13, X+
- 	ld V14, X+
-; 	push r25
-; 	push r24
- 	movw r26, r24 /* X points to destination */
- 
-	ldi Func1, 32
-	mov r0, Func1 /* r0 is cycle-counter */
-	clr Sum1
-	clr Sum2
-	movw Sum3, Sum1
-	clt
-
-1:
-	movw Accu1, V11
-	movw Accu3, V13
-	ldi C, 4
-2:	lsl Accu1
-	rol Accu2
-	rol Accu3
-	rol Accu4
-	dec C
-	brne 2b			/* Accu == V1 << 4 */
-
-	movw Func1, V11
-	movw Func3, V13
-	ldi C, 5
-3:	lsr Func4
-	ror Func3
-	ror Func2
-	ror Func1
-	dec C
-	brne 3b			/* Func == V1 >> 5 */
-	
-	eor Accu1, Func1
-	eor Accu2, Func2
-	eor Accu3, Func3
-	eor Accu4, Func4
-	add Accu1, V11
-	adc Accu2, V12
-	adc Accu3, V13
-	adc Accu4, V14	/* Accu == ( (V1<<4)^(V1>>5) ) + V1 */
-	
-	brtc 4f
-	mov C, Sum2
-	lsr C
-	andi C,(0x03 <<2)
-	clt
-	rjmp 5f
-4:	
-	mov C, Sum1	/* calc key offset */
-	andi C, 0x03
-	lsl C
-	lsl C
-	set
-	
-5:	
-	add r30, C
-	adc r31, r1
-	ld  Func1, Z
-	ldd Func2, Z+1
-	ldd Func3, Z+2
-	ldd Func4, Z+3 /* Func = key[sum & 3] */
-	sub r30, C
-	sbci r31, 0
-	add Func1, Sum1
-	adc Func2, Sum2
-	adc Func3, Sum3
-	adc Func4, Sum4 
-	eor Accu1, Func1
-	eor Accu2, Func2
-	eor Accu3, Func3
-	eor Accu4, Func4 /* Accu = ((V1<<4 ^ V1>>5) + V1) ^ (sum + key[sum&3])  */
-	add Accu1, V01
-	adc Accu2, V02
-	adc Accu3, V03
-	adc Accu4, V04
-	
-	movw V01, V11
-	movw V03, V13
-	movw V11, Accu1
-	movw V13, Accu3
-	
-	/* sum += delta */ /* delta == 0x9E3779B9 */
-	brtc 6f
-	ldi C, 0xB9
-	add Sum1, C
-	ldi C, 0x79
-	adc Sum2, C
-	ldi C, 0x37
-	adc Sum3, C
-	ldi C, 0x9E
-	adc Sum4, C
-	rjmp 1b
-	
-6:	
-	dec r0
-	breq 7f
-	rjmp 1b 
- 
- 7:
- /* write block back */
- ;	pop r26
- ;	pop r27
- 	st X+, V01
- 	st X+, V02
- 	st X+, V03
-  	st X+, V04
- 	st X+, V11
- 	st X+, V12
- 	st X+, V13
- 	st X+, V14
- 
- /* epilog */
- 	pop r28
- 	pop r17
- 	pop r16
- 	pop r15
- 	pop r14
- 	pop r9
- 	pop r8
- 	pop r7
- 	pop r6
- 	pop r5
- 	pop r4
- 	pop r3
- 	pop r2
- 	ret
-
-;####################################################################
- 
- /* #endif TWO_IN_ONE */	
- 
- /* #ifdef TWO_IN_ONE */
- /* now we use the same base-structure for enc- and decryption
- 	to indicate operation mode we use the highest bit of param3 (16 bit pointer to key),
- 	this is ok, since even the larges atmel today has "only" 8k of ram,
- 	but you shouldn't use this feature while using external ram. 
- */
-.global xtea_enc
- 	ori r21, 0x80
- 	
-.global xtea_dec
-; == xtea_dec ==
-; xtea decrytion function
-; param1: 16-bit pointer to destination for decrypted block 
-;  given in r25,r24
-; param2: 16-bit pointer to the block (64-bit) which is to derypt 
-;  given in r23,r22
-; param3: 16-bit pointer to the key (128-bit) 
-;  given in r21,r20
-;
-/*
-void xtea_dec(uint32_t* dest, uint32_t* v, uint32_t* k) {
-    uint32_t v0=v[0], v1=v[1], i;
-    uint32_t sum=0xC6EF3720, delta=0x9E3779B9;
-    for(i=0; i<32; i++) {
-        v1 -= ((v0 << 4 ^ v0 >> 5) + v0) ^ (sum + k[sum>>11 & 3]);
-        sum -= delta;
-        v0 -= ((v1 << 4 ^ v1 >> 5) + v1) ^ (sum + k[sum & 3]);
-    }
-    dest[0]=v0; dest[1]=v1;
-}
-*/
-
-xtea_dec:
- /* prolog */
- 	push r2
- 	push r3
- 	push r4
- 	push r5
- 	push r6
- 	push r7
- 	push r8
- 	push r9
- 	push r14
- 	push r15
- 	push r16
- 	push r17
- 	push r28 
- /* load the block */
- 	movw r26, r22 /* Z points to block */
- 	movw r30, r20 /* X points to key   */
- 	ld V01, X+
- 	ld V02, X+
- 	ld V03, X+
- 	ld V04, X+
- 	ld V11, X+
- 	ld V12, X+
- 	ld V13, X+
- 	ld V14, X+
- 	movw r26, r24 /* Z points to destination */
- 
-	ldi Sum1, 32
-	mov r0, Sum1 /* r1 is cycle-counter */
-	ldi Sum1, 0x20 /* sum = 0xC6EF3720 */
-	ldi Sum2, 0x37
-	ldi Sum3, 0xEF
-	ldi Sum4, 0xC6
-	clt
-
-1:
-	movw Accu1, V01
-	movw Accu3, V03
-	ldi C, 4
-2:	lsl Accu1
-	rol Accu2
-	rol Accu3
-	rol Accu4
-	dec C
-	brne 2b			/* Accu == V0 << 4 */
-
-	movw Func1, V01
-	movw Func3, V03
-	ldi C, 5
-3:	lsr Func4
-	ror Func3
-	ror Func2
-	ror Func1
-	dec C
-	brne 3b			/* Func == V0 >> 5 */
-	
-	eor Accu1, Func1
-	eor Accu2, Func2
-	eor Accu3, Func3
-	eor Accu4, Func4
-	add Accu1, V01
-	adc Accu2, V02
-	adc Accu3, V03
-	adc Accu4, V04	/* Accu == ( (V0<<4)^(V0>>5) ) + V0 */
-	
-	brts 4f
-	mov C, Sum2
-	lsr C
-	andi C,(0x03 <<2)
-	set
-	rjmp 5f
-4:	
-	mov C, Sum1	/* calc key offset */
-	andi C, 0x03
-	lsl C
-	lsl C
-	clt
-	
-5:	
-	add r30, C
-	adc r31, r1
-	ld  Func1, Z
-	ldd Func2, Z+1
-	ldd Func3, Z+2
-	ldd Func4, Z+3 /* Func = key[sum & 3] */
-	sub r30, C
-	sbci r31, 0
-	add Func1, Sum1
-	adc Func2, Sum2
-	adc Func3, Sum3
-	adc Func4, Sum4 
-	eor Accu1, Func1
-	eor Accu2, Func2
-	eor Accu3, Func3
-	eor Accu4, Func4 /* Accu = ((V0<<4 ^ V0>>5) + V0) ^ (sum + key[sum&3])  */
-	sub V11, Accu1
-	sbc V12, Accu2
-	sbc V13, Accu3
-	sbc V14, Accu4
-	
-	movw Accu1, V01
-	movw Accu3, V03
-	movw V01, V11
-	movw V03, V13
-	movw V11, Accu1
-	movw V13, Accu3
-	
-	/* sum += delta */ /* delta == 0x9E3779B9 */
-	brtc 6f
-	subi Sum1, 0xB9
-	sbci Sum2, 0x79
-	sbci Sum3, 0x37
-	sbci Sum4, 0x9E
-	rjmp 1b
-	
-6:	
-	dec r0
-	breq 7f
-	rjmp 1b 
- 
-7:
- /* write block back */
- 	st X+, V01
- 	st X+, V02
- 	st X+, V03
-  	st X+, V04
- 	st X+, V11
- 	st X+, V12
- 	st X+, V13
- 	st X+, V14
- 
- /* epilog */
- 	pop r28
- 	pop r17
- 	pop r16
- 	pop r15
- 	pop r14
- 	pop r9
- 	pop r8
- 	pop r7
- 	pop r6
- 	pop r5
- 	pop r4
- 	pop r3
- 	pop r2
- 	ret
- 	
- /* #endif */
-
-;####################################################################
- 
- #ifdef TWO_IN_ONE
- /* now we use the same base-structure for enc- and decryption
- 	to indicate operation mode we use the highest bit of param3 (16 bit pointer to key),
- 	this is ok, since even the larges atmel today has "only" 8k of ram,
- 	but you shouldn't use this feature while using external ram. 
- */
-.global xtea_enc
- 	ori r21, 0x80
- 	
-.global xtea_dec
-; == xtea_dec ==
-; xtea decrytion function
-; param1: 16-bit pointer to destination for decrypted block 
-;  given in r25,r24
-; param2: 16-bit pointer to the block (64-bit) which is to derypt 
-;  given in r23,r22
-; param3: 16-bit pointer to the key (128-bit) 
-;  given in r21,r20
-;
-/*
-void xtea_dec(uint32_t* dest, uint32_t* v, uint32_t* k) {
-    uint32_t v0=v[0], v1=v[1], i;
-    uint32_t sum=0xC6EF3720, delta=0x9E3779B9;
-    for(i=0; i<32; i++) {
-        v1 -= ((v0 << 4 ^ v0 >> 5) + v0) ^ (sum + k[sum>>11 & 3]);
-        sum -= delta;
-        v0 -= ((v1 << 4 ^ v1 >> 5) + v1) ^ (sum + k[sum & 3]);
-    }
-    dest[0]=v0; dest[1]=v1;
-}
-*/
-
-xtea_dec:
- /* prolog */
- 	push r2
- 	push r3
- 	push r4
- 	push r5
- 	push r6
- 	push r7
- 	push r8
- 	push r9
- 	push r14
- 	push r15
- 	push r16
- 	push r17
- 	push r28 
- /* set T-bit if we are going to encrypt, clear otherwise */
- 	bst r21, 7
- 	andi r21, 0x7f /* fix r21:r22 to a real addr */
- /* load the block */
- 	movw r26, r22 /* Z points to block */
- 	movw r30, r20 /* X points to key   */
- 	ld V01, X+
- 	ld V02, X+
- 	ld V03, X+
- 	ld V04, X+
- 	ld V11, X+
- 	ld V12, X+
- 	ld V13, X+
- 	ld V14, X+
- 	movw r26, r24 /* Z points to destination */
- 
-	ldi Sum1, 32
-	mov r0, Sum1 /* r1 is cycle-counter */
-	ldi Sum1, 0x20 /* sum = 0xC6EF3720 */
-	ldi Sum2, 0x37
-	ldi Sum3, 0xEF
-	ldi Sum4, 0xC6
-	clt
-
-1:
-	movw Accu1, V01
-	movw Accu3, V03
-	ldi C, 4
-2:	lsl Accu1
-	rol Accu2
-	rol Accu3
-	rol Accu4
-	dec C
-	brne 2b			/* Accu == V0 << 4 */
-
-	movw Func1, V01
-	movw Func3, V03
-	ldi C, 5
-3:	lsr Func4
-	ror Func3
-	ror Func2
-	ror Func1
-	dec C
-	brne 3b			/* Func == V0 >> 5 */
-	
-	eor Accu1, Func1
-	eor Accu2, Func2
-	eor Accu3, Func3
-	eor Accu4, Func4
-	add Accu1, V01
-	adc Accu2, V02
-	adc Accu3, V03
-	adc Accu4, V04	/* Accu == ( (V0<<4)^(V0>>5) ) + V0 */
-	
-	brts 4f
-	mov C, Sum2
-	lsr C
-	andi C,(0x03 <<2)
-	set
-	rjmp 5f
-4:	
-	mov C, Sum1	/* calc key offset */
-	andi C, 0x03
-	lsl C
-	lsl C
-	clt
-	
-5:	
-	add r30, C
-	adc r31, r1
-	ld  Func1, Z
-	ldd Func2, Z+1
-	ldd Func3, Z+2
-	ldd Func4, Z+3 /* Func = key[sum & 3] */
-	sub r30, C
-	sbci r31, 0
-	add Func1, Sum1
-	adc Func2, Sum2
-	adc Func3, Sum3
-	adc Func4, Sum4 
-	eor Accu1, Func1
-	eor Accu2, Func2
-	eor Accu3, Func3
-	eor Accu4, Func4 /* Accu = ((V0<<4 ^ V0>>5) + V0) ^ (sum + key[sum&3])  */
-	sub V11, Accu1
-	sbc V12, Accu2
-	sbc V13, Accu3
-	sbc V14, Accu4
-	
-	movw Accu1, V01
-	movw Accu3, V03
-	movw V01, V11
-	movw V03, V13
-	movw V11, Accu1
-	movw V13, Accu3
-	
-	/* sum += delta */ /* delta == 0x9E3779B9 */
-	brtc 6f
-	subi Sum1, 0xB9
-	sbci Sum2, 0x79
-	sbci Sum3, 0x37
-	sbci Sum4, 0x9E
-	rjmp 1b
-	
-6:	
-	dec r0
-	breq 7f
-	rjmp 1b 
- 
-7:
- /* write block back */
- 	st X+, V01
- 	st X+, V02
- 	st X+, V03
-  	st X+, V04
- 	st X+, V11
- 	st X+, V12
- 	st X+, V13
- 	st X+, V14
- 
- /* epilog */
- 	pop r28
- 	pop r17
- 	pop r16
- 	pop r15
- 	pop r14
- 	pop r9
- 	pop r8
- 	pop r7
- 	pop r6
- 	pop r5
- 	pop r4
- 	pop r3
- 	pop r2
- 	ret
- 	
- #endif
-
diff --git a/xtea.c b/xtea.c
deleted file mode 100644
index 4605cb0..0000000
--- a/xtea.c
+++ /dev/null
@@ -1,53 +0,0 @@
-/* xtea.c */
-/*
-    This file is part of the AVR-Crypto-Lib.
-    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-/** 
- * \file	xtea.c
- * \brief	XTEA implemantation 
- *   copy'n'pasted from http://en.wikipedia.org/wiki/XTEA
- *   and slightly modified
- */
- 
-#include <stdint.h> 
- 
-
-void xtea_enc(void* dest, const void* v, const void* k) {
-    uint8_t i;
-    uint32_t v0=((uint32_t*)v)[0], v1=((uint32_t*)v)[1];
-    uint32_t sum=0, delta=0x9E3779B9;
-    for(i=0; i<32; i++) {
-        v0 += ((v1 << 4 ^ v1 >> 5) + v1) ^ (sum + ((uint32_t*)k)[sum & 3]);
-        sum += delta;
-        v1 += ((v0 << 4 ^ v0 >> 5) + v0) ^ (sum + ((uint32_t*)k)[sum>>11 & 3]);
-    }
-    ((uint32_t*)dest)[0]=v0; ((uint32_t*)dest)[1]=v1;
-}
-
-void xtea_dec(void* dest, const void* v, const void* k) {
-    uint8_t i;
-    uint32_t v0=((uint32_t*)v)[0], v1=((uint32_t*)v)[1];
-    uint32_t sum=0xC6EF3720, delta=0x9E3779B9;
-    for(i=0; i<32; i++) {
-        v1 -= ((v0 << 4 ^ v0 >> 5) + v0) ^ (sum + ((uint32_t*)k)[sum>>11 & 3]);
-        sum -= delta;
-        v0 -= ((v1 << 4 ^ v1 >> 5) + v1) ^ (sum + ((uint32_t*)k)[sum & 3]);
-    }
-    ((uint32_t*)dest)[0]=v0; ((uint32_t*)dest)[1]=v1;
-}
-
-
diff --git a/xtea.h b/xtea.h
deleted file mode 100644
index b753bf7..0000000
--- a/xtea.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/* xtea.h */
-/*
-    This file is part of the AVR-Crypto-Lib.
-    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-/*
- * Author:	Daniel Otte
- * Date:		06.06.2006
- * License:	GPL
- */
-
-#ifndef XTEA_H_
-#define XTEA_H_
-
-#include <stdint.h> 
- 
-/*
- * this fits for xtea.c and xtea-asm.S
- * 
- */
-#define XTEA_BLOCKSIZE 64
-#define XTEA_BLOCKSIZEB ((XTEA_BLOCKSIZE+7)/8)
-#define XTEA_KEYSIZE 128
-#define XTEA_KEYSIZEB ((XTEA_KEYSIZE+7)/8)
-
-
-/*
- * dest: the destination where result of operation will be placed (64 bit)
- * v:	 the block to operate on (64 bit)
- * k:	 the key for en/decryption (128 bit)
- */
-void xtea_enc(void* dest, const void* v, const void* k);
-void xtea_dec(void* dest, const void* v, const void* k);
-
-
-#endif /*XTEA_H_*/
diff --git a/xtea/xtea-asm.S b/xtea/xtea-asm.S
new file mode 100644
index 0000000..826f123
--- /dev/null
+++ b/xtea/xtea-asm.S
@@ -0,0 +1,585 @@
+/* xtea-asm.S */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/* xtea-asm.S 
+ * Author:      Daniel Otte
+ * Date:        2006-06-06
+ * License:     GPLv3 or later
+ *  Implementation of XTEA for AVR
+ *  include xtea.h in your C-Project to use this functions.
+*/
+
+V01 = 2
+V02 = 3
+V03 = 4
+V04 = 5
+V11 = 6
+V12 = 7
+V13 = 8
+V14 = 9
+Accu1 = 14
+Accu2 = 15
+Accu3 = 16
+Accu4 = 17
+Sum1 = 18
+Sum2 = 19
+Sum3 = 20
+Sum4 = 21
+Func1 = 22
+Func2 = 23
+Func3 = 24
+Func4 = 25
+C = 28 /* der kleine Zaehler fuer zwischendurch */
+
+.global xtea_enc
+; == xtea_enc ==
+; xtea encrytion function
+; param1: 16-bit pointer to destination for encrypted block 
+;  given in r25,r24
+; param2: 16-bit pointer to the block (64-bit) which is to encrypt 
+;  given in r23,r22
+; param3: 16-bit pointer to the key (128-bit) 
+;  given in r21,r20
+;
+xtea_enc:
+ /* prolog */
+ 	push r2
+ 	push r3
+ 	push r4
+ 	push r5
+ 	push r6
+ 	push r7
+ 	push r8
+ 	push r9
+ 	push r14
+ 	push r15
+ 	push r16
+ 	push r17
+ 	push r28
+ 	
+ /* load the block */
+ 	movw r26, r22 /* X points to block */
+ 	movw r30, r20 /* Z points to key   */
+ 	ld V01, X+
+ 	ld V02, X+
+ 	ld V03, X+
+ 	ld V04, X+
+ 	ld V11, X+
+ 	ld V12, X+
+ 	ld V13, X+
+ 	ld V14, X+
+; 	push r25
+; 	push r24
+ 	movw r26, r24 /* X points to destination */
+ 
+	ldi Func1, 32
+	mov r0, Func1 /* r0 is cycle-counter */
+	clr Sum1
+	clr Sum2
+	movw Sum3, Sum1
+	clt
+
+1:
+	movw Accu1, V11
+	movw Accu3, V13
+	ldi C, 4
+2:	lsl Accu1
+	rol Accu2
+	rol Accu3
+	rol Accu4
+	dec C
+	brne 2b			/* Accu == V1 << 4 */
+
+	movw Func1, V11
+	movw Func3, V13
+	ldi C, 5
+3:	lsr Func4
+	ror Func3
+	ror Func2
+	ror Func1
+	dec C
+	brne 3b			/* Func == V1 >> 5 */
+	
+	eor Accu1, Func1
+	eor Accu2, Func2
+	eor Accu3, Func3
+	eor Accu4, Func4
+	add Accu1, V11
+	adc Accu2, V12
+	adc Accu3, V13
+	adc Accu4, V14	/* Accu == ( (V1<<4)^(V1>>5) ) + V1 */
+	
+	brtc 4f
+	mov C, Sum2
+	lsr C
+	andi C,(0x03 <<2)
+	clt
+	rjmp 5f
+4:	
+	mov C, Sum1	/* calc key offset */
+	andi C, 0x03
+	lsl C
+	lsl C
+	set
+	
+5:	
+	add r30, C
+	adc r31, r1
+	ld  Func1, Z
+	ldd Func2, Z+1
+	ldd Func3, Z+2
+	ldd Func4, Z+3 /* Func = key[sum & 3] */
+	sub r30, C
+	sbci r31, 0
+	add Func1, Sum1
+	adc Func2, Sum2
+	adc Func3, Sum3
+	adc Func4, Sum4 
+	eor Accu1, Func1
+	eor Accu2, Func2
+	eor Accu3, Func3
+	eor Accu4, Func4 /* Accu = ((V1<<4 ^ V1>>5) + V1) ^ (sum + key[sum&3])  */
+	add Accu1, V01
+	adc Accu2, V02
+	adc Accu3, V03
+	adc Accu4, V04
+	
+	movw V01, V11
+	movw V03, V13
+	movw V11, Accu1
+	movw V13, Accu3
+	
+	/* sum += delta */ /* delta == 0x9E3779B9 */
+	brtc 6f
+	ldi C, 0xB9
+	add Sum1, C
+	ldi C, 0x79
+	adc Sum2, C
+	ldi C, 0x37
+	adc Sum3, C
+	ldi C, 0x9E
+	adc Sum4, C
+	rjmp 1b
+	
+6:	
+	dec r0
+	breq 7f
+	rjmp 1b 
+ 
+ 7:
+ /* write block back */
+ ;	pop r26
+ ;	pop r27
+ 	st X+, V01
+ 	st X+, V02
+ 	st X+, V03
+  	st X+, V04
+ 	st X+, V11
+ 	st X+, V12
+ 	st X+, V13
+ 	st X+, V14
+ 
+ /* epilog */
+ 	pop r28
+ 	pop r17
+ 	pop r16
+ 	pop r15
+ 	pop r14
+ 	pop r9
+ 	pop r8
+ 	pop r7
+ 	pop r6
+ 	pop r5
+ 	pop r4
+ 	pop r3
+ 	pop r2
+ 	ret
+
+;####################################################################
+ 
+ /* #endif TWO_IN_ONE */	
+ 
+ /* #ifdef TWO_IN_ONE */
+ /* now we use the same base-structure for enc- and decryption
+ 	to indicate operation mode we use the highest bit of param3 (16 bit pointer to key),
+ 	this is ok, since even the larges atmel today has "only" 8k of ram,
+ 	but you shouldn't use this feature while using external ram. 
+ */
+.global xtea_enc
+ 	ori r21, 0x80
+ 	
+.global xtea_dec
+; == xtea_dec ==
+; xtea decrytion function
+; param1: 16-bit pointer to destination for decrypted block 
+;  given in r25,r24
+; param2: 16-bit pointer to the block (64-bit) which is to derypt 
+;  given in r23,r22
+; param3: 16-bit pointer to the key (128-bit) 
+;  given in r21,r20
+;
+/*
+void xtea_dec(uint32_t* dest, uint32_t* v, uint32_t* k) {
+    uint32_t v0=v[0], v1=v[1], i;
+    uint32_t sum=0xC6EF3720, delta=0x9E3779B9;
+    for(i=0; i<32; i++) {
+        v1 -= ((v0 << 4 ^ v0 >> 5) + v0) ^ (sum + k[sum>>11 & 3]);
+        sum -= delta;
+        v0 -= ((v1 << 4 ^ v1 >> 5) + v1) ^ (sum + k[sum & 3]);
+    }
+    dest[0]=v0; dest[1]=v1;
+}
+*/
+
+xtea_dec:
+ /* prolog */
+ 	push r2
+ 	push r3
+ 	push r4
+ 	push r5
+ 	push r6
+ 	push r7
+ 	push r8
+ 	push r9
+ 	push r14
+ 	push r15
+ 	push r16
+ 	push r17
+ 	push r28 
+ /* load the block */
+ 	movw r26, r22 /* Z points to block */
+ 	movw r30, r20 /* X points to key   */
+ 	ld V01, X+
+ 	ld V02, X+
+ 	ld V03, X+
+ 	ld V04, X+
+ 	ld V11, X+
+ 	ld V12, X+
+ 	ld V13, X+
+ 	ld V14, X+
+ 	movw r26, r24 /* Z points to destination */
+ 
+	ldi Sum1, 32
+	mov r0, Sum1 /* r1 is cycle-counter */
+	ldi Sum1, 0x20 /* sum = 0xC6EF3720 */
+	ldi Sum2, 0x37
+	ldi Sum3, 0xEF
+	ldi Sum4, 0xC6
+	clt
+
+1:
+	movw Accu1, V01
+	movw Accu3, V03
+	ldi C, 4
+2:	lsl Accu1
+	rol Accu2
+	rol Accu3
+	rol Accu4
+	dec C
+	brne 2b			/* Accu == V0 << 4 */
+
+	movw Func1, V01
+	movw Func3, V03
+	ldi C, 5
+3:	lsr Func4
+	ror Func3
+	ror Func2
+	ror Func1
+	dec C
+	brne 3b			/* Func == V0 >> 5 */
+	
+	eor Accu1, Func1
+	eor Accu2, Func2
+	eor Accu3, Func3
+	eor Accu4, Func4
+	add Accu1, V01
+	adc Accu2, V02
+	adc Accu3, V03
+	adc Accu4, V04	/* Accu == ( (V0<<4)^(V0>>5) ) + V0 */
+	
+	brts 4f
+	mov C, Sum2
+	lsr C
+	andi C,(0x03 <<2)
+	set
+	rjmp 5f
+4:	
+	mov C, Sum1	/* calc key offset */
+	andi C, 0x03
+	lsl C
+	lsl C
+	clt
+	
+5:	
+	add r30, C
+	adc r31, r1
+	ld  Func1, Z
+	ldd Func2, Z+1
+	ldd Func3, Z+2
+	ldd Func4, Z+3 /* Func = key[sum & 3] */
+	sub r30, C
+	sbci r31, 0
+	add Func1, Sum1
+	adc Func2, Sum2
+	adc Func3, Sum3
+	adc Func4, Sum4 
+	eor Accu1, Func1
+	eor Accu2, Func2
+	eor Accu3, Func3
+	eor Accu4, Func4 /* Accu = ((V0<<4 ^ V0>>5) + V0) ^ (sum + key[sum&3])  */
+	sub V11, Accu1
+	sbc V12, Accu2
+	sbc V13, Accu3
+	sbc V14, Accu4
+	
+	movw Accu1, V01
+	movw Accu3, V03
+	movw V01, V11
+	movw V03, V13
+	movw V11, Accu1
+	movw V13, Accu3
+	
+	/* sum += delta */ /* delta == 0x9E3779B9 */
+	brtc 6f
+	subi Sum1, 0xB9
+	sbci Sum2, 0x79
+	sbci Sum3, 0x37
+	sbci Sum4, 0x9E
+	rjmp 1b
+	
+6:	
+	dec r0
+	breq 7f
+	rjmp 1b 
+ 
+7:
+ /* write block back */
+ 	st X+, V01
+ 	st X+, V02
+ 	st X+, V03
+  	st X+, V04
+ 	st X+, V11
+ 	st X+, V12
+ 	st X+, V13
+ 	st X+, V14
+ 
+ /* epilog */
+ 	pop r28
+ 	pop r17
+ 	pop r16
+ 	pop r15
+ 	pop r14
+ 	pop r9
+ 	pop r8
+ 	pop r7
+ 	pop r6
+ 	pop r5
+ 	pop r4
+ 	pop r3
+ 	pop r2
+ 	ret
+ 	
+ /* #endif */
+
+;####################################################################
+ 
+ #ifdef TWO_IN_ONE
+ /* now we use the same base-structure for enc- and decryption
+ 	to indicate operation mode we use the highest bit of param3 (16 bit pointer to key),
+ 	this is ok, since even the larges atmel today has "only" 8k of ram,
+ 	but you shouldn't use this feature while using external ram. 
+ */
+.global xtea_enc
+ 	ori r21, 0x80
+ 	
+.global xtea_dec
+; == xtea_dec ==
+; xtea decrytion function
+; param1: 16-bit pointer to destination for decrypted block 
+;  given in r25,r24
+; param2: 16-bit pointer to the block (64-bit) which is to derypt 
+;  given in r23,r22
+; param3: 16-bit pointer to the key (128-bit) 
+;  given in r21,r20
+;
+/*
+void xtea_dec(uint32_t* dest, uint32_t* v, uint32_t* k) {
+    uint32_t v0=v[0], v1=v[1], i;
+    uint32_t sum=0xC6EF3720, delta=0x9E3779B9;
+    for(i=0; i<32; i++) {
+        v1 -= ((v0 << 4 ^ v0 >> 5) + v0) ^ (sum + k[sum>>11 & 3]);
+        sum -= delta;
+        v0 -= ((v1 << 4 ^ v1 >> 5) + v1) ^ (sum + k[sum & 3]);
+    }
+    dest[0]=v0; dest[1]=v1;
+}
+*/
+
+xtea_dec:
+ /* prolog */
+ 	push r2
+ 	push r3
+ 	push r4
+ 	push r5
+ 	push r6
+ 	push r7
+ 	push r8
+ 	push r9
+ 	push r14
+ 	push r15
+ 	push r16
+ 	push r17
+ 	push r28 
+ /* set T-bit if we are going to encrypt, clear otherwise */
+ 	bst r21, 7
+ 	andi r21, 0x7f /* fix r21:r22 to a real addr */
+ /* load the block */
+ 	movw r26, r22 /* Z points to block */
+ 	movw r30, r20 /* X points to key   */
+ 	ld V01, X+
+ 	ld V02, X+
+ 	ld V03, X+
+ 	ld V04, X+
+ 	ld V11, X+
+ 	ld V12, X+
+ 	ld V13, X+
+ 	ld V14, X+
+ 	movw r26, r24 /* Z points to destination */
+ 
+	ldi Sum1, 32
+	mov r0, Sum1 /* r1 is cycle-counter */
+	ldi Sum1, 0x20 /* sum = 0xC6EF3720 */
+	ldi Sum2, 0x37
+	ldi Sum3, 0xEF
+	ldi Sum4, 0xC6
+	clt
+
+1:
+	movw Accu1, V01
+	movw Accu3, V03
+	ldi C, 4
+2:	lsl Accu1
+	rol Accu2
+	rol Accu3
+	rol Accu4
+	dec C
+	brne 2b			/* Accu == V0 << 4 */
+
+	movw Func1, V01
+	movw Func3, V03
+	ldi C, 5
+3:	lsr Func4
+	ror Func3
+	ror Func2
+	ror Func1
+	dec C
+	brne 3b			/* Func == V0 >> 5 */
+	
+	eor Accu1, Func1
+	eor Accu2, Func2
+	eor Accu3, Func3
+	eor Accu4, Func4
+	add Accu1, V01
+	adc Accu2, V02
+	adc Accu3, V03
+	adc Accu4, V04	/* Accu == ( (V0<<4)^(V0>>5) ) + V0 */
+	
+	brts 4f
+	mov C, Sum2
+	lsr C
+	andi C,(0x03 <<2)
+	set
+	rjmp 5f
+4:	
+	mov C, Sum1	/* calc key offset */
+	andi C, 0x03
+	lsl C
+	lsl C
+	clt
+	
+5:	
+	add r30, C
+	adc r31, r1
+	ld  Func1, Z
+	ldd Func2, Z+1
+	ldd Func3, Z+2
+	ldd Func4, Z+3 /* Func = key[sum & 3] */
+	sub r30, C
+	sbci r31, 0
+	add Func1, Sum1
+	adc Func2, Sum2
+	adc Func3, Sum3
+	adc Func4, Sum4 
+	eor Accu1, Func1
+	eor Accu2, Func2
+	eor Accu3, Func3
+	eor Accu4, Func4 /* Accu = ((V0<<4 ^ V0>>5) + V0) ^ (sum + key[sum&3])  */
+	sub V11, Accu1
+	sbc V12, Accu2
+	sbc V13, Accu3
+	sbc V14, Accu4
+	
+	movw Accu1, V01
+	movw Accu3, V03
+	movw V01, V11
+	movw V03, V13
+	movw V11, Accu1
+	movw V13, Accu3
+	
+	/* sum += delta */ /* delta == 0x9E3779B9 */
+	brtc 6f
+	subi Sum1, 0xB9
+	sbci Sum2, 0x79
+	sbci Sum3, 0x37
+	sbci Sum4, 0x9E
+	rjmp 1b
+	
+6:	
+	dec r0
+	breq 7f
+	rjmp 1b 
+ 
+7:
+ /* write block back */
+ 	st X+, V01
+ 	st X+, V02
+ 	st X+, V03
+  	st X+, V04
+ 	st X+, V11
+ 	st X+, V12
+ 	st X+, V13
+ 	st X+, V14
+ 
+ /* epilog */
+ 	pop r28
+ 	pop r17
+ 	pop r16
+ 	pop r15
+ 	pop r14
+ 	pop r9
+ 	pop r8
+ 	pop r7
+ 	pop r6
+ 	pop r5
+ 	pop r4
+ 	pop r3
+ 	pop r2
+ 	ret
+ 	
+ #endif
+
diff --git a/xtea/xtea.c b/xtea/xtea.c
new file mode 100644
index 0000000..4605cb0
--- /dev/null
+++ b/xtea/xtea.c
@@ -0,0 +1,53 @@
+/* xtea.c */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/** 
+ * \file	xtea.c
+ * \brief	XTEA implemantation 
+ *   copy'n'pasted from http://en.wikipedia.org/wiki/XTEA
+ *   and slightly modified
+ */
+ 
+#include <stdint.h> 
+ 
+
+void xtea_enc(void* dest, const void* v, const void* k) {
+    uint8_t i;
+    uint32_t v0=((uint32_t*)v)[0], v1=((uint32_t*)v)[1];
+    uint32_t sum=0, delta=0x9E3779B9;
+    for(i=0; i<32; i++) {
+        v0 += ((v1 << 4 ^ v1 >> 5) + v1) ^ (sum + ((uint32_t*)k)[sum & 3]);
+        sum += delta;
+        v1 += ((v0 << 4 ^ v0 >> 5) + v0) ^ (sum + ((uint32_t*)k)[sum>>11 & 3]);
+    }
+    ((uint32_t*)dest)[0]=v0; ((uint32_t*)dest)[1]=v1;
+}
+
+void xtea_dec(void* dest, const void* v, const void* k) {
+    uint8_t i;
+    uint32_t v0=((uint32_t*)v)[0], v1=((uint32_t*)v)[1];
+    uint32_t sum=0xC6EF3720, delta=0x9E3779B9;
+    for(i=0; i<32; i++) {
+        v1 -= ((v0 << 4 ^ v0 >> 5) + v0) ^ (sum + ((uint32_t*)k)[sum>>11 & 3]);
+        sum -= delta;
+        v0 -= ((v1 << 4 ^ v1 >> 5) + v1) ^ (sum + ((uint32_t*)k)[sum & 3]);
+    }
+    ((uint32_t*)dest)[0]=v0; ((uint32_t*)dest)[1]=v1;
+}
+
+
diff --git a/xtea/xtea.h b/xtea/xtea.h
new file mode 100644
index 0000000..b753bf7
--- /dev/null
+++ b/xtea/xtea.h
@@ -0,0 +1,49 @@
+/* xtea.h */
+/*
+    This file is part of the AVR-Crypto-Lib.
+    Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/*
+ * Author:	Daniel Otte
+ * Date:		06.06.2006
+ * License:	GPL
+ */
+
+#ifndef XTEA_H_
+#define XTEA_H_
+
+#include <stdint.h> 
+ 
+/*
+ * this fits for xtea.c and xtea-asm.S
+ * 
+ */
+#define XTEA_BLOCKSIZE 64
+#define XTEA_BLOCKSIZEB ((XTEA_BLOCKSIZE+7)/8)
+#define XTEA_KEYSIZE 128
+#define XTEA_KEYSIZEB ((XTEA_KEYSIZE+7)/8)
+
+
+/*
+ * dest: the destination where result of operation will be placed (64 bit)
+ * v:	 the block to operate on (64 bit)
+ * k:	 the key for en/decryption (128 bit)
+ */
+void xtea_enc(void* dest, const void* v, const void* k);
+void xtea_dec(void* dest, const void* v, const void* k);
+
+
+#endif /*XTEA_H_*/