void arcfour_init(const void *key, uint16_t length_b, arcfour_ctx_t *ctx){
uint8_t t;
uint8_t length_B = length_b/8;
- uint16_t x,y=0;
- for(x=0; x<= 255; ++x)
+ uint8_t x=0,y=0;
+ uint8_t *kptr=key;
+ do{
ctx->s[x]=x;
+ }while(++x);
- for(x=0; x<= 255; ++x){
- y += ctx->s[x] + ((uint8_t*)key)[x % length_B];
+ do{
+ y += ctx->s[x] + *kptr++;
+ if(x==length_B){
+ kptr = key;
+ }
y &= 0xff;
/* ctx->s[y] <--> ctx->s[x] */
t = ctx->s[y];
ctx->s[y] = ctx->s[x];
ctx->s[x] = t;
- }
+ }while(++x);
+
ctx->i = ctx->j = 0;
}
--- /dev/null
+
+MCU_TARGET = atmega128
+F_CPU = 14745600
+OPTIMIZE = -Os # -Os
+DEBUG = -gdwarf-2
+WARNING = -pedantic -Wall -Wstrict-prototypes
+PROGRAMMER = jtagmkII
+PROG_PORT = usb
+DEFS = -D$(call uc, $(MCU_TARGET)) -DF_CPU=$(F_CPU)
+FLASHCMD = avrdude -p $(MCU_TARGET) -P $(PROG_PORT) -c $(PROGRAMMER) -U flash:w:# no space at the end
+#FLASHCMD = avrdude -p $(MCU_TARGET) -c usbasp -U flash:w:# no space at the end
+RESETCMD = avrdude -p $(MCU_TARGET) -P $(PROG_PORT) -c $(PROGRAMMER)
+DEP_DIR = deps/
+TEST_DIR = test/
+BIN_DIR = bin/
+TESTSRC_DIR = test_src/
+#uisp -dprog=bsd -dlpt=/dev/parport1 --upload if=$(PRG).hex
+ERASECMD =
+TESTPORT = /dev/ttyUSB0
+TESTPORTBAUDR = 115200
+TESTLOG_DIR = testlog/#
+TESTPREFIX = nessie-
+SPEEDTOOL = host/get_performance.rb
+SPEEDLOG_DIR = speed_log/
+SPEEDPREFIX =
+SPEEDCMD = performance
+SIZE_DIR = size_log/#
+LIST_DIR = listings/#
+STAT_DIR = stats/#
+AUTOASM_DIR = autoasm/#
+AUTOASM_OPT = -S
+CC = avr-gcc
+CSTD = c99
+
+override CFLAGS_A = -MMD -MF$(DEP_DIR)$(patsubst %.o,%.d,$(notdir $(1))) $(DEBUG) $(WARNING) -std=$(CSTD) $(OPTIMIZE) -mmcu=$(MCU_TARGET) $(DEFS)
+override CFLAGS = -MMD -MF$(DEP_DIR)$(patsubst %.o,%.d,$(notdir $@)) $(DEBUG) $(WARNING) -std=$(CSTD) $(OPTIMIZE) -mmcu=$(MCU_TARGET) $(DEFS)
+
+override LDFLAGS = -gdwarf-2 -Wl,-Map,
+override ASFLAGS = -mmcu=$(MCU_TARGET) -Wa,--gdwarf-2
+
+SIZESTAT_FILE = sizestats.txt
+
+OBJCOPY = avr-objcopy
+OBJDUMP = avr-objdump
+SIZE = avr-size
+READELF = readelf
+RUBY = ruby
+GET_TEST = host/get_test.rb
+MAKE = make
+MAKE2GRAPH = ~/bin/make2graph.rb
+TWOPI = twopi
MCU_TARGET = atmega644
+F_CPU = 20000000
OPTIMIZE = -Os # -Os
DEBUG = -gdwarf-2
WARNING = -pedantic -Wall -Wstrict-prototypes
PROGRAMMER = avr911
-DEFS = -D$(call uc, $(MCU_TARGET))
-FLASHCMD = avrdude -p $(MCU_TARGET) -P /dev/ttyUSB0 -c $(PROGRAMMER) -U flash:w:# no space at the end
+PROG_PORT = /dev/ttyUSB0
+DEFS = -D$(call uc, $(MCU_TARGET)) -DF_CPU=$(F_CPU)
+FLASHCMD = avrdude -p $(MCU_TARGET) -P $(PROG_PORT) -c $(PROGRAMMER) -U flash:w:# no space at the end
#FLASHCMD = avrdude -p $(MCU_TARGET) -c usbasp -U flash:w:# no space at the end
-RESETCMD = avrdude -p $(MCU_TARGET) -P /dev/ttyUSB0 -c $(PROGRAMMER)
+RESETCMD = avrdude -p $(MCU_TARGET) -P $(PROG_PORT) -c $(PROGRAMMER)
DEP_DIR = deps/
TEST_DIR = test/
BIN_DIR = bin/
const bcdesc_t xtea_desc PROGMEM = {
BCDESC_TYPE_BLOCKCIPHER,
- BC_INIT_TYPE_2,
+ BC_INIT_TYPE_1,
xtea_str,
16,
64,
STREAM_CIPHERS += $(ALGO_NAME)
$(ALGO_NAME)_OBJ := A5_1.o
-$(ALGO_NAME)_TEST_BIN := main-a5_1-test.o nessie_stream_test.o nessie_common.o $(CLI_STD)
+$(ALGO_NAME)_DIR := a51/
+$(ALGO_NAME)_INCDIR := memxor/ scal/
+$(ALGO_NAME)_TEST_BIN := main-a5_1-test.o $(CLI_STD) $(SCAL_STD)
$(ALGO_NAME)_NESSIE_TEST := "nessie"
$(ALGO_NAME)_PERFORMANCE_TEST := "performance"
.macro CLEAR_BIT_IO io:req bit:req reg:req
.if _SFR_IO_REG_P(\io)
- cbi _SFR_IO_ADDR(\io), bit
+ cbi _SFR_IO_ADDR(\io), \bit
.else
lds \reg, _SFR_MEM_ADDR(\io)
andi \reg, ~_BV(\bit)
.macro SET_BIT_IO io:req bit:req reg:req
.if _SFR_IO_REG_P(\io)
- sbi _SFR_IO_ADDR(\io),bit
+ sbi _SFR_IO_ADDR(\io), \bit
.else
lds \reg, _SFR_MEM_ADDR(\io)
ori \reg, _BV(\bit)
#ifndef __CONFIG_H__
#define __CONFIG_H__
#include <avr/io.h>
-#define F_CPU 20000000
+//#define F_CPU 20000000
// #define F_CPU 16000000 /* oscillator-frequency in Hz */
// #define F_CPU 14745600
* param addr: r20:r23
* param length: r18
*/
+#ifdef EEWE
+# define EEPE EEWE
+#endif
+
.global ee_read_block
ee_read_block:
movw r26, r24
bcal_performance_multiple(algolist);
}
+void test_xtea(void){
+ uint8_t key[16];
+ uint8_t data[8];
+
+ memset(key, 0, 16);
+ key[0] = 0x80;
+ memset(data, 0, 8);
+ cli_putstr_P(PSTR("\r\n*** XTEA test ***\r\n key: "));
+ cli_hexdump(key, 16);
+ cli_putstr_P(PSTR("\r\n plain: "));
+ cli_hexdump(data, 8);
+ xtea_enc(data, data, key);
+ cli_putstr_P(PSTR("\r\n crypt: "));
+ cli_hexdump(data, 8);
+ xtea_dec(data, data, key);
+ cli_putstr_P(PSTR("\r\n plain: "));
+ cli_hexdump(data, 8);
+}
+
/*****************************************************************************
* main *
*****************************************************************************/
cmdlist_entry_t cmdlist[] PROGMEM = {
{ nessie_str, NULL, testrun_nessie_xtea},
- { test_str, NULL, testrun_nessie_xtea},
+ { test_str, NULL, test_xtea},
{ performance_str, NULL, testrun_performance_xtea},
{ echo_str, (void*)1, (void_fpt)echo_ctrl},
{ NULL, NULL, NULL}
/* uart_defs.h */
/*
- This file is part of the AVR-uart_ni.
+ This file is part of the AVR-uart_i.
Copyright (C) 2009 Daniel Otte (daniel.otte@rub.de)
This program is free software: you can redistribute it and/or modify
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-
+/**
+ * \file uart_defs.h
+ * \email daniel.otte@rub.de
+ * \author Daniel Otte
+ * \date 2009-07-24
+ * \license GPLv3 or later
+ * \addtogroup uart_config
+ * \brief definitions for uart configuration
+ * \details
+ * This file declares some macros for use in uart configuration
+ */
+/*@{*/
#ifndef UART_DEFS_H_
#define UART_DEFS_H_
-#define UART_PARATY_NONE 0 /** \def UART_PARATY_NONE define no paraty */
-#define UART_PARATY_EVEN 2 /** \def UART_PARATY_EVEN define even paraty */
-#define UART_PARATY_ODD 3 /** \def UART_PARATY_ODD define odd paraty */
-#define UART_STOPBITS_1 0 /** \def UART_STOPBITS_1 define 1 stop bit */
-#define UART_STOPBITS_2 1 /** \def UART_STOPBITS_2 define 2 stop bits */
-#define UART_DATABITS_5 0 /** \def UART_DATABITS_5 define 5 data bits */
-#define UART_DATABITS_6 1 /** \def UART_DATABITS_6 define 6 data bits */
-#define UART_DATABITS_7 2 /** \def UART_DATABITS_7 define 7 data bits */
-#define UART_DATABITS_8 3 /** \def UART_DATABITS_8 define 8 data bits */
-#define UART_DATABITS_9 7 /** \def UART_DATABITS_9 define 9 data bits */
-
+#define UART_PARATY_NONE 0 /**< define no paraty */
+#define UART_PARATY_EVEN 2 /**< define even paraty */
+#define UART_PARATY_ODD 3 /**< define odd paraty */
+#define UART_STOPBITS_1 0 /**< define 1 stop bit */
+#define UART_STOPBITS_2 1 /**< define 2 stop bits */
+#define UART_DATABITS_5 0 /**< define 5 data bits */
+#define UART_DATABITS_6 1 /**< define 6 data bits */
+#define UART_DATABITS_7 2 /**< define 7 data bits */
+#define UART_DATABITS_8 3 /**< define 8 data bits */
+#define UART_DATABITS_9 7 /**< define 9 data bits */
+/*@}*/
#endif /* UART_DEFS_H_ */
/**
* \file uart_i.h
* \email daniel.otte@rub.de
- * \author Daniel Otte
+ * \author Daniel Otte
* \date 2009-07-24
* \license GPLv3 or later
- * \ingroup uart_i
- * \brief declaration for non-interrupt uart
+ * \defgroup uart_i
+ * \brief declaration for interrupt based uart
+ * \details
+ * This implementation of the uart-interface of AVR microcontrollers uses the
+ * interrup architecture and can be used to handle serial communication in the
+ * background.
+ * The uart is configured at compile-time by some special defines starting with
+ * \a UART0_ for configuring the first uart and \a UART1_ for the second.
+ * Some settings use symbolic values defined in uart_defs.h .
+ * The following options are available:
+ * - \a *_I enables the interrupt based driver for this uart
+ * - \a 0 disables driver
+ * - \a 1 enables driver
+ * - \a *_BAUD_RATE sets the baudrate for this uart (value is the baudrate)
+ * - \a *_STOPBITS sets the amount of stop bits for this uart
+ * - \a UART_STOPBITS_1 for one stop bit
+ * - \a UART_STOPBITS_2 for two stop bits
+ * - \a *_DATABITS sets the amount of data bits for this uart
+ * - \a UART_DATABITS_5 for five data bits
+ * - \a UART_DATABITS_6 for six data bits
+ * - \a UART_DATABITS_7 for seven data bits
+ * - \a UART_DATABITS_8 for eight data bits
+ * - \a *_PARATY sets the mode for paraty calculation for this uart
+ * - \a UART_PARATY_NONE ignore paraty
+ * - \a UART_PARATY_ODD odd paraty
+ * - \a UART_PARATY_EVEN even paraty
+ * - \a *_RXBUFFER_SIZE size of the recieve buffer in bytes
+ * - \a *_TXBUFFER_SIZE size of the transmitt buffer in bytes
+ * - \a *_SWFLOWCTRL enable/diasable software flow control (via XON & XOFF)
+ * - \a 0 disable software flow control
+ * - \a 1 enable software flow control
+ * - \a *_THRESH_HIGH set upper limit for the rx buffer, which causes an XOFF
+ * to be send when crossed (only relevant if software flow
+ * control is enabled)
+ * - \a *_THRESH_LOW set lower limit for the rx buffer, which causes an XON to
+ * be send when crossed and an XOFF has been send previously
+ * (only relevant if software flow control is enabled)
+ * - \a *_HOOK enable/disable implementation of the hook feature
+ * (\ref uart0_sethook())
+ * - \a 0 disable hook feature
+ * - \a 1 enable hook feature
+ *
*/
+/*@{*/
#ifndef UART_I_H_
#define UART_I_H_
#include "circularbytebuffer.h"
#include <stdint.h>
+/**
+ * \brief storage type for uart0 context
+ *
+ * This type is used to store uart0 specific global variables.
+ * It contains a pointer to the buffer instances and when neccessary
+ * a pointer to the hook function and an indicator if the hook is
+ * currently executed.
+ * If software flow control is enabled it also contains flags for flow control.
+ */
typedef struct{
- circularbytebuffer_t rxb;
- circularbytebuffer_t txb;
+ circularbytebuffer_t rxb; /**< recieve buffer */
+ circularbytebuffer_t txb; /**< transmitt buffer*/
#if UART0_HOOK
- void(*hook)(uint8_t);
- volatile uint8_t hook_running;
+ void(*hook)(uint8_t); /**< pointer to the hook function */
+ volatile uint8_t hook_running; /**< flag indicating if the hook is running */
#endif
#if UART0_SWFLOWCTRL
- volatile uint8_t txon;
- volatile uint8_t rxon;
+ volatile uint8_t txon; /**< flag indicating if we are allowed to send data */
+ volatile uint8_t rxon; /**< flag indicating if we have send an \a XOFF */
#endif
} uart0_ctx_t;
+/**
+ * \brief storage type for uart1 context
+ *
+ * This type is used to store uart1 specific global variables.
+ * It contains a pointer to the buffer instances and when neccessary
+ * a pointer to the hook function and an indicator if the hook is
+ * currently executed.
+ * If software flow control is enabled it also contains flags for flow control.
+ */
typedef struct{
- circularbytebuffer_t rxb;
- circularbytebuffer_t txb;
+ circularbytebuffer_t rxb; /**< recieve buffer */
+ circularbytebuffer_t txb; /**< transmitt buffer */
#if UART1_HOOK
- void(*hook)(uint8_t);
- volatile uint8_t hook_running;
+ void(*hook)(uint8_t); /**< pointer to the hook function */
+ volatile uint8_t hook_running; /**< flag indicating if the hook is running */
#endif
#if UART1_SWFLOWCTRL
- volatile uint8_t txon;
- volatile uint8_t rxon;
+ volatile uint8_t txon; /**< flag indicating if we are allowed to send data */
+ volatile uint8_t rxon; /**< flag indicating if we have send an \a XOFF */
#endif
} uart1_ctx_t;
#if UART0_I
-/** \fn uart0_init(void)
+/**
* \brief initialize uart0.
- * This function initializes the first uart according to the parameter specifyed
+ *
+ * This function initializes the first uart according to the parameter specified
* in config.h .
*/
void uart0_init(void);
-/** \fn uart0_putc(uint16_t)
+/**
* \brief send data through uart0.
- * This function sends data through the first uart
+ *
+ * This function sends data through the first uart
* (the data size is debfined in config.h).
* \param c data to send
*/
void uart0_putc(uint16_t c);
-/** \fn uart0_getc(void)
+/**
* \brief read data from uart0.
- * This function reads data from the first uart
+ *
+ * This function reads data from the first uart
* (the data size is debfined in config.h).
* \return data recived by uart0
*/
uint16_t uart0_getc(void);
-/** \fn uart0_dataavail(void)
+/**
* \brief checks if data is available.
- *
+ *
* This function checks the state of the input buffer of uart0 and
* returns if data is available or not.
* \return zero if no data is available else a value different from zero is returned
uint8_t uart0_dataavail(void);
#if UART0_HOOK
+/**
+ * \brief sets the hook for uart0.
+ *
+ * This function modifys the way the software handels incomming data.
+ * When the hook is set to \a NULL (which is the default) incomming data is buffered
+ * in a special ringbuffer and read by \ref uart0_getc(). If the hook is set to a
+ * different value, this value is interpret as a function pointer. The hook (the
+ * function where the function pointer points to) is called with the recieved data
+ * as single parameter. Any value returned by the hook is discarded.
+
+ * \note If the hook is set \ref uart0_getc() will not return, as the
+ * ringbuffer is bypassed.
+ * \param fpt pointer to thae handler function for recieved data
+ */
void uart0_sethook(void(*fpt)(uint8_t));
#endif
#endif /* UART0_I */
#if UART1_I
-/** \fn uart1_init(void)
+/**
* \brief initialize uart1.
+ *
* This function initializes the second uart according to the parameter specifyed
* in config.h .
*/
void uart1_init(void);
-/** \fn uart1_putc(uint16_t)
+/**
* \brief send data through uart1.
- * This function sends data through the second uart
+ *
+ * This function sends data through the second uart
* (the data size is debfined in config.h).
* \param c data to send
*/
void uart1_putc(uint16_t c);
-/** \fn uart1_getc(void)
+/**
* \brief read data from uart1.
- * This function reads data from the second uart
+ *
+ * This function reads data from the second uart
* (the data size is debfined in config.h).
* \return data recived by uart1
*/
uint16_t uart1_getc(void);
-/** \fn uart1_dataavail(void)
+/**
* \brief checks if data is available.
+ *
* This function checks the state of the input buffer of uart1 and
* returns if data is available or not.
* \return zero if no data is available else a value different from zero is returned
*/
uint8_t uart1_dataavail(void);
-void uart0_sethook(void(*fpt)(uint8_t));
+/**
+ * \brief sets the hook for uart1.
+ *
+ * This function modifys the way the software handels incomming data.
+ * When the hook is set to \a NULL (which is the default) incomming data is buffered
+ * in a special ringbuffer and read by \ref uart1_getc(). If the hook is set to a
+ * different value, this value is interpret as a function pointer. The hook (the
+ * function where the function pointer points to) is called with the recieved data
+ * as single parameter. Any value returned by the hook is discarded.
+
+ * \note If the hook is set \ref uart1_getc() will not return, as the
+ * ringbuffer is bypassed.
+ * \param fpt pointer to thae handler function for recieved data
+ */
+void uart1_sethook(void(*fpt)(uint8_t));
#endif
+/*@}*/
+
#endif /* UART_I_H_ */
-/* xtea-asm.S */
+/* xtea-enc.S */
/*
- This file is part of the AVR-Crypto-Lib.
- Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de)
+ This file is part of the ARM-Crypto-Lib.
+ Copyright (C) 2006-2011 Daniel Otte (daniel.otte@rub.de)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-/* xtea-asm.S
- * Author: Daniel Otte
- * Date: 2006-06-06
- * License: GPLv3 or later
- * Implementation of XTEA for AVR
- * include xtea.h in your C-Project to use this functions.
-*/
-V01 = 2
-V02 = 3
-V03 = 4
-V04 = 5
-V11 = 6
-V12 = 7
-V13 = 8
-V14 = 9
-Accu1 = 14
-Accu2 = 15
-Accu3 = 16
-Accu4 = 17
-Sum1 = 18
-Sum2 = 19
-Sum3 = 20
-Sum4 = 21
-Func1 = 22
-Func2 = 23
-Func3 = 24
-Func4 = 25
-C = 28 /* der kleine Zaehler fuer zwischendurch */
+#include "avr-asm-macros.S"
+
+B0 = 4
+B1 = 5
+B2 = 6
+B3 = 7
+
+A0 = 8
+A1 = 9
+A2 = 10
+A3 = 11
+
+V10 = 12
+V11 = 13
+V12 = 14
+V13 = 15
+
+V00 = 16
+V01 = 17
+V02 = 18
+V03 = 19
+
+S0 = 20
+S1 = 21
+S2 = 22
+S3 = 23
+
+xchg_V0V1:
+ movw r26, V10
+ movw V10, V00
+ movw V00, r26
+ movw r26, V12
+ movw V12, V02
+ movw V02, r26
+ ret
+
+eor_AB:
+ eor A0, B0
+ eor A1, B1
+ eor A2, B2
+ eor A3, B3
+ ret
+
+g_func:
+ movw A0, V10
+ movw A2, V12
+ movw B0, V10
+ movw B2, V12
+
+ ldi r24, 4
+10:
+ lsl A0
+ rol A1
+ rol A2
+ rol A3
+ dec r24
+ brne 10b
+
+ ldi r24, 5
+10:
+ lsr B3
+ ror B2
+ ror B1
+ ror B0
+ dec r24
+ brne 10b
+
+ rcall eor_AB
+
+ add A0, V10
+ adc A1, V11
+ adc A2, V12
+ adc A3, V13
+
+ ret
+
+sum_plus_k:
+ andi r24, (3<<2)
+ movw r26, r30
+ add r26, r24
+ adc r27, r1
+ ld B0, X+
+ ld B1, X+
+ ld B2, X+
+ ld B3, X+
+ add B0, S0
+ adc B1, S1
+ adc B2, S2
+ adc B3, S3
+ rcall eor_AB
+ brtc 20f
+ add V00, A0
+ adc V01, A1
+ adc V02, A2
+ adc V03, A3
+ ret
+20: sub V00, A0
+ sbc V01, A1
+ sbc V02, A2
+ sbc V03, A3
+ ret
+
+main1:
+ rcall g_func
+ mov r24, S0
+ lsl r24
+ lsl r24
+ rcall sum_plus_k
+ ret
+
+main2:
+ rcall xchg_V0V1
+ rcall g_func
+ mov r24, S1
+ lsr r24
+ rcall sum_plus_k
+ rcall xchg_V0V1
+ ret
.global xtea_enc
-; == xtea_enc ==
-; xtea encrytion function
-; param1: 16-bit pointer to destination for encrypted block
-; given in r25,r24
-; param2: 16-bit pointer to the block (64-bit) which is to encrypt
-; given in r23,r22
-; param3: 16-bit pointer to the key (128-bit)
-; given in r21,r20
-;
xtea_enc:
- /* prolog */
- push r2
- push r3
- push r4
- push r5
- push r6
- push r7
- push r8
- push r9
- push r14
- push r15
- push r16
- push r17
- push r28
-
- /* load the block */
- movw r26, r22 /* X points to block */
- movw r30, r20 /* Z points to key */
- ld V01, X+
- ld V02, X+
- ld V03, X+
- ld V04, X+
- ld V11, X+
- ld V12, X+
- ld V13, X+
- ld V14, X+
-; push r25
-; push r24
- movw r26, r24 /* X points to destination */
-
- ldi Func1, 32
- mov r0, Func1 /* r0 is cycle-counter */
- clr Sum1
- clr Sum2
- movw Sum3, Sum1
- clt
-
-1:
- movw Accu1, V11
- movw Accu3, V13
- ldi C, 4
-2: lsl Accu1
- rol Accu2
- rol Accu3
- rol Accu4
- dec C
- brne 2b /* Accu == V1 << 4 */
-
- movw Func1, V11
- movw Func3, V13
- ldi C, 5
-3: lsr Func4
- ror Func3
- ror Func2
- ror Func1
- dec C
- brne 3b /* Func == V1 >> 5 */
-
- eor Accu1, Func1
- eor Accu2, Func2
- eor Accu3, Func3
- eor Accu4, Func4
- add Accu1, V11
- adc Accu2, V12
- adc Accu3, V13
- adc Accu4, V14 /* Accu == ( (V1<<4)^(V1>>5) ) + V1 */
-
- brtc 4f
- mov C, Sum2
- lsr C
- andi C,(0x03 <<2)
- clt
- rjmp 5f
-4:
- mov C, Sum1 /* calc key offset */
- andi C, 0x03
- lsl C
- lsl C
set
-
-5:
- add r30, C
- adc r31, r1
- ld Func1, Z
- ldd Func2, Z+1
- ldd Func3, Z+2
- ldd Func4, Z+3 /* Func = key[sum & 3] */
- sub r30, C
- sbci r31, 0
- add Func1, Sum1
- adc Func2, Sum2
- adc Func3, Sum3
- adc Func4, Sum4
- eor Accu1, Func1
- eor Accu2, Func2
- eor Accu3, Func3
- eor Accu4, Func4 /* Accu = ((V1<<4 ^ V1>>5) + V1) ^ (sum + key[sum&3]) */
- add Accu1, V01
- adc Accu2, V02
- adc Accu3, V03
- adc Accu4, V04
-
- movw V01, V11
- movw V03, V13
- movw V11, Accu1
- movw V13, Accu3
-
- /* sum += delta */ /* delta == 0x9E3779B9 */
- brtc 6f
- ldi C, 0xB9
- add Sum1, C
- ldi C, 0x79
- adc Sum2, C
- ldi C, 0x37
- adc Sum3, C
- ldi C, 0x9E
- adc Sum4, C
- rjmp 1b
-
-6:
+xtea_intro:
+ clr r27
+ ldi r26, 4
+ ldi r30, 14
+10:
+ ld r0, X+
+ push r0
+ dec r30
+ brne 10b
+
+ push r24
+ push r25
+ movw r30, r20
+/* load block */
+ movw r26, r22
+ ld V00, X+
+ ld V01, X+
+ ld V02, X+
+ ld V03, X+
+ ld V10, X+
+ ld V11, X+
+ ld V12, X+
+ ld V13, X+
+ ldi r24, 32
+ mov r0, r24
+ brtc xtea_dec_start
+ clr S0
+ clr S1
+ movw S2, S0
+
+10:
+ rcall main1
+ subi S0, 0x47
+ sbci S1, 0x86
+ sbci S2, 0xC8
+ sbci S3, 0x61
+ rcall main2
+
dec r0
- breq 7f
- rjmp 1b
-
- 7:
- /* write block back */
- ; pop r26
- ; pop r27
- st X+, V01
- st X+, V02
- st X+, V03
- st X+, V04
- st X+, V11
- st X+, V12
- st X+, V13
- st X+, V14
-
- /* epilog */
- pop r28
- pop r17
- pop r16
- pop r15
- pop r14
- pop r9
- pop r8
- pop r7
- pop r6
- pop r5
- pop r4
- pop r3
- pop r2
- ret
-
-;####################################################################
-
- /* #endif TWO_IN_ONE */
-
- /* #ifdef TWO_IN_ONE */
- /* now we use the same base-structure for enc- and decryption
- to indicate operation mode we use the highest bit of param3 (16 bit pointer to key),
- this is ok, since even the larges atmel today has "only" 8k of ram,
- but you shouldn't use this feature while using external ram.
- */
-.global xtea_enc
- ori r21, 0x80
-
-.global xtea_dec
-; == xtea_dec ==
-; xtea decrytion function
-; param1: 16-bit pointer to destination for decrypted block
-; given in r25,r24
-; param2: 16-bit pointer to the block (64-bit) which is to derypt
-; given in r23,r22
-; param3: 16-bit pointer to the key (128-bit)
-; given in r21,r20
-;
-/*
-void xtea_dec(uint32_t* dest, uint32_t* v, uint32_t* k) {
- uint32_t v0=v[0], v1=v[1], i;
- uint32_t sum=0xC6EF3720, delta=0x9E3779B9;
- for(i=0; i<32; i++) {
- v1 -= ((v0 << 4 ^ v0 >> 5) + v0) ^ (sum + k[sum>>11 & 3]);
- sum -= delta;
- v0 -= ((v1 << 4 ^ v1 >> 5) + v1) ^ (sum + k[sum & 3]);
- }
- dest[0]=v0; dest[1]=v1;
-}
-*/
+ brne 10b
+
+/* store back */
+xtea_enc_exit:
+ pop r27
+ pop r26
+ st X+, V00
+ st X+, V01
+ st X+, V02
+ st X+, V03
+ st X+, V10
+ st X+, V11
+ st X+, V12
+ st X+, V13
+
+ clr r27
+ ldi r26, 18
+ ldi r24, 14
+10:
+ pop r0
+ st -X, r0
+ dec r24
+ brne 10b
+ ret
+
+
+/******************************************************************************/
+/******************************************************************************/
+/******************************************************************************/
+/******************************************************************************/
+.global xtea_dec
xtea_dec:
- /* prolog */
- push r2
- push r3
- push r4
- push r5
- push r6
- push r7
- push r8
- push r9
- push r14
- push r15
- push r16
- push r17
- push r28
- /* load the block */
- movw r26, r22 /* Z points to block */
- movw r30, r20 /* X points to key */
- ld V01, X+
- ld V02, X+
- ld V03, X+
- ld V04, X+
- ld V11, X+
- ld V12, X+
- ld V13, X+
- ld V14, X+
- movw r26, r24 /* Z points to destination */
-
- ldi Sum1, 32
- mov r0, Sum1 /* r1 is cycle-counter */
- ldi Sum1, 0x20 /* sum = 0xC6EF3720 */
- ldi Sum2, 0x37
- ldi Sum3, 0xEF
- ldi Sum4, 0xC6
clt
+ rjmp xtea_intro
+xtea_dec_start:
+ ldi S0, 0x20 /* sum = 0xC6EF3720 */
+ ldi S1, 0x37
+ ldi S2, 0xEF
+ ldi S3, 0xC6
+
+10:
+ rcall main2
+ subi S0, 0xB9
+ sbci S1, 0x79
+ sbci S2, 0x37
+ sbci S3, 0x9E
+ rcall main1
-1:
- movw Accu1, V01
- movw Accu3, V03
- ldi C, 4
-2: lsl Accu1
- rol Accu2
- rol Accu3
- rol Accu4
- dec C
- brne 2b /* Accu == V0 << 4 */
-
- movw Func1, V01
- movw Func3, V03
- ldi C, 5
-3: lsr Func4
- ror Func3
- ror Func2
- ror Func1
- dec C
- brne 3b /* Func == V0 >> 5 */
-
- eor Accu1, Func1
- eor Accu2, Func2
- eor Accu3, Func3
- eor Accu4, Func4
- add Accu1, V01
- adc Accu2, V02
- adc Accu3, V03
- adc Accu4, V04 /* Accu == ( (V0<<4)^(V0>>5) ) + V0 */
-
- brts 4f
- mov C, Sum2
- lsr C
- andi C,(0x03 <<2)
- set
- rjmp 5f
-4:
- mov C, Sum1 /* calc key offset */
- andi C, 0x03
- lsl C
- lsl C
- clt
-
-5:
- add r30, C
- adc r31, r1
- ld Func1, Z
- ldd Func2, Z+1
- ldd Func3, Z+2
- ldd Func4, Z+3 /* Func = key[sum & 3] */
- sub r30, C
- sbci r31, 0
- add Func1, Sum1
- adc Func2, Sum2
- adc Func3, Sum3
- adc Func4, Sum4
- eor Accu1, Func1
- eor Accu2, Func2
- eor Accu3, Func3
- eor Accu4, Func4 /* Accu = ((V0<<4 ^ V0>>5) + V0) ^ (sum + key[sum&3]) */
- sub V11, Accu1
- sbc V12, Accu2
- sbc V13, Accu3
- sbc V14, Accu4
-
- movw Accu1, V01
- movw Accu3, V03
- movw V01, V11
- movw V03, V13
- movw V11, Accu1
- movw V13, Accu3
-
- /* sum += delta */ /* delta == 0x9E3779B9 */
- brtc 6f
- subi Sum1, 0xB9
- sbci Sum2, 0x79
- sbci Sum3, 0x37
- sbci Sum4, 0x9E
- rjmp 1b
-
-6:
dec r0
- breq 7f
- rjmp 1b
-
-7:
- /* write block back */
- st X+, V01
- st X+, V02
- st X+, V03
- st X+, V04
- st X+, V11
- st X+, V12
- st X+, V13
- st X+, V14
-
- /* epilog */
- pop r28
- pop r17
- pop r16
- pop r15
- pop r14
- pop r9
- pop r8
- pop r7
- pop r6
- pop r5
- pop r4
- pop r3
- pop r2
- ret
-
- /* #endif */
-
-;####################################################################
-
- #ifdef TWO_IN_ONE
- /* now we use the same base-structure for enc- and decryption
- to indicate operation mode we use the highest bit of param3 (16 bit pointer to key),
- this is ok, since even the larges atmel today has "only" 8k of ram,
- but you shouldn't use this feature while using external ram.
- */
-.global xtea_enc
- ori r21, 0x80
-
-.global xtea_dec
-; == xtea_dec ==
-; xtea decrytion function
-; param1: 16-bit pointer to destination for decrypted block
-; given in r25,r24
-; param2: 16-bit pointer to the block (64-bit) which is to derypt
-; given in r23,r22
-; param3: 16-bit pointer to the key (128-bit)
-; given in r21,r20
-;
-/*
-void xtea_dec(uint32_t* dest, uint32_t* v, uint32_t* k) {
- uint32_t v0=v[0], v1=v[1], i;
- uint32_t sum=0xC6EF3720, delta=0x9E3779B9;
- for(i=0; i<32; i++) {
- v1 -= ((v0 << 4 ^ v0 >> 5) + v0) ^ (sum + k[sum>>11 & 3]);
- sum -= delta;
- v0 -= ((v1 << 4 ^ v1 >> 5) + v1) ^ (sum + k[sum & 3]);
- }
- dest[0]=v0; dest[1]=v1;
-}
-*/
+ brne 10b
+/* store back */
+ rjmp xtea_enc_exit
-xtea_dec:
- /* prolog */
- push r2
- push r3
- push r4
- push r5
- push r6
- push r7
- push r8
- push r9
- push r14
- push r15
- push r16
- push r17
- push r28
- /* set T-bit if we are going to encrypt, clear otherwise */
- bst r21, 7
- andi r21, 0x7f /* fix r21:r22 to a real addr */
- /* load the block */
- movw r26, r22 /* Z points to block */
- movw r30, r20 /* X points to key */
- ld V01, X+
- ld V02, X+
- ld V03, X+
- ld V04, X+
- ld V11, X+
- ld V12, X+
- ld V13, X+
- ld V14, X+
- movw r26, r24 /* Z points to destination */
-
- ldi Sum1, 32
- mov r0, Sum1 /* r1 is cycle-counter */
- ldi Sum1, 0x20 /* sum = 0xC6EF3720 */
- ldi Sum2, 0x37
- ldi Sum3, 0xEF
- ldi Sum4, 0xC6
- clt
-1:
- movw Accu1, V01
- movw Accu3, V03
- ldi C, 4
-2: lsl Accu1
- rol Accu2
- rol Accu3
- rol Accu4
- dec C
- brne 2b /* Accu == V0 << 4 */
-
- movw Func1, V01
- movw Func3, V03
- ldi C, 5
-3: lsr Func4
- ror Func3
- ror Func2
- ror Func1
- dec C
- brne 3b /* Func == V0 >> 5 */
-
- eor Accu1, Func1
- eor Accu2, Func2
- eor Accu3, Func3
- eor Accu4, Func4
- add Accu1, V01
- adc Accu2, V02
- adc Accu3, V03
- adc Accu4, V04 /* Accu == ( (V0<<4)^(V0>>5) ) + V0 */
-
- brts 4f
- mov C, Sum2
- lsr C
- andi C,(0x03 <<2)
- set
- rjmp 5f
-4:
- mov C, Sum1 /* calc key offset */
- andi C, 0x03
- lsl C
- lsl C
- clt
-
-5:
- add r30, C
- adc r31, r1
- ld Func1, Z
- ldd Func2, Z+1
- ldd Func3, Z+2
- ldd Func4, Z+3 /* Func = key[sum & 3] */
- sub r30, C
- sbci r31, 0
- add Func1, Sum1
- adc Func2, Sum2
- adc Func3, Sum3
- adc Func4, Sum4
- eor Accu1, Func1
- eor Accu2, Func2
- eor Accu3, Func3
- eor Accu4, Func4 /* Accu = ((V0<<4 ^ V0>>5) + V0) ^ (sum + key[sum&3]) */
- sub V11, Accu1
- sbc V12, Accu2
- sbc V13, Accu3
- sbc V14, Accu4
-
- movw Accu1, V01
- movw Accu3, V03
- movw V01, V11
- movw V03, V13
- movw V11, Accu1
- movw V13, Accu3
-
- /* sum += delta */ /* delta == 0x9E3779B9 */
- brtc 6f
- subi Sum1, 0xB9
- sbci Sum2, 0x79
- sbci Sum3, 0x37
- sbci Sum4, 0x9E
- rjmp 1b
-
-6:
- dec r0
- breq 7f
- rjmp 1b
-
-7:
- /* write block back */
- st X+, V01
- st X+, V02
- st X+, V03
- st X+, V04
- st X+, V11
- st X+, V12
- st X+, V13
- st X+, V14
-
- /* epilog */
- pop r28
- pop r17
- pop r16
- pop r15
- pop r14
- pop r9
- pop r8
- pop r7
- pop r6
- pop r5
- pop r4
- pop r3
- pop r2
- ret
-
- #endif
*/
#include <stdint.h>
-
void xtea_enc(void* dest, const void* v, const void* k) {
uint8_t i;