new hash function abstraction layer + shavs + dump util + ...

[avr-crypto-lib.git] / seed-asm.S
diff --git a/seed-asm.S b/seed-asm.S

index bf9c840c515e64226c99983b673c46ef98d1f08f..84866c382098f266ed06bbb318bec5b2035d123e 100644 (file)
--- a/seed-asm.S
+++ b/seed-asm.S
@@ -1,6 +1,6 @@
  /* seed-asm.S */
  /*
  /* seed-asm.S */
  /*
-    This file is part of the Crypto-avr-lib/microcrypt-lib.
+    This file is part of the AVR-Crypto-Lib.
      Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
  
      This program is free software: you can redistribute it and/or modify
      Copyright (C) 2008  Daniel Otte (daniel.otte@rub.de)
  
      This program is free software: you can redistribute it and/or modify
@@ -27,36 +27,6 @@
   */
  #include "avr-asm-macros.S"
  
   */
  #include "avr-asm-macros.S"
  
-.global bigendian_sum32
-; === bigendian_sum32 ===
-; function that adds two 32-bit words in the bigendian way and returns the result
-;  param1: the first 32-bit word
-;      given in r25,r24,r23,22 (r25 is most significant for little endian)
-;  param2: the second 32-bit word
-;      given in r21,r20,r19,18 (r21 is most significant for little endian)
-;  modifys: 
-bigendian_sum32:
-       add r25, r21
-       adc r24, r20
-       adc r23, r19
-       adc r22, r18
-       ret
-       
-.global bigendian_sub32
-; === bigendian_sub32 ===
-; function that subtracts a 32-bit words from another in the bigendian way and returns the result
-;  param1: the minuend 32-bit word
-;      given in r25,r24,r23,22 (r25 is most significant for little endian)
-;  param2: the subtrahend 32-bit word
-;      given in r21,r20,r19,18 (r21 is most significant for little endian)
-;  modifys: 
-bigendian_sub32:
-       sub r25, r21
-       sbc r24, r20
-       sbc r23, r19
-       sbc r22, r18
-       ret
-
  /******************************************************************************/       
  /*
  #define M0 0xfc
  /******************************************************************************/       
  /*
  #define M0 0xfc
@@ -112,8 +82,7 @@ T3 = X1
   *  X2 = R23
   *  X3 = R22
   */    
   *  X2 = R23
   *  X3 = R22
   */    
-.global g_function
-g_function:
+seed_g_function:
         ldi r30, lo8(seed_sbox1)
         ldi r31, hi8(seed_sbox1)
         movw r26, r30
         ldi r30, lo8(seed_sbox1)
         ldi r31, hi8(seed_sbox1)
         movw r26, r30
@@ -249,8 +218,740 @@ seed_sbox2:
  .byte    55,  231,   36,  164,  203,   83,   10,  135 
  .byte   217,   76,  131,  143,  206,   59,   74,  183 
  
  .byte    55,  231,   36,  164,  203,   83,   10,  135 
  .byte   217,   76,  131,  143,  206,   59,   74,  183 
  
+/******************************************************************************/
+
+/*
+static
+uint64_t f_function(const uint64_t* a, uint32_t k0, uint32_t k1){
+       uint32_t c,d;
+
+       c = *a & 0x00000000FFFFFFFFLL;
+       d = (*a>>32) & 0x00000000FFFFFFFFLL;
+       
+       c ^= k0; d ^= k1;
+       d ^= c;
+       d = g_function(d);
+       c = bigendian_sum32(c,d);
+       c = g_function(c);
+       d = bigendian_sum32(c,d);
+       d = g_function(d);
+       c = bigendian_sum32(c,d);       
+       return ((uint64_t)d << 32) | c;
+}
+*/
+/*
+ * param a   r24:r25
+ * param k0  r20:r23
+ * param k1  r16:r19
+ */
+D0 = 10
+D1 = 11
+C0 = 12
+C1 = 13
+C2 = 14
+C3 = 15
+D2 = 16
+D3 = 17
+seed_f_function:
+       push_range 10, 17
+       movw r30, r24
+       ld C0, Z+
+       ld C1, Z+
+       ld C2, Z+
+       ld C3, Z+
+       eor C0, r20
+       eor C1, r21
+       eor C2, r22
+       eor C3, r23
+       ld r22, Z+
+       ld r23, Z+
+       ld r24, Z+
+       ld r25, Z+
+       eor r22, r16
+       eor r23, r17
+       eor r24, r18
+       eor r25, r19
+       eor r22, C0
+       eor r23, C1
+       eor r24, C2
+       eor r25, C3
+       rcall seed_g_function
+       mov D0, r22
+       mov D1, r23
+       mov D2, r24
+       mov D3, r25
+
+       add r25, C3
+       adc r24, C2
+       adc r23, C1
+       adc r22, C0
+       rcall seed_g_function
+       mov C0, r22
+       mov C1, r23
+       mov C2, r24
+       mov C3, r25
+
+       add r25, D3
+       adc r24, D2
+       adc r23, D1
+       adc r22, D0
+       rcall seed_g_function
+       mov D0, r22
+       mov D1, r23
+       mov D2, r24
+       mov D3, r25
+
+       add C3, r25
+       adc C2, r24
+       adc C1, r23
+       adc C0, r22
+
+       mov r18, C0
+       mov r19, C1
+       mov r20, C2
+       mov r21, C3
+       
+       pop_range 10, 17
+       ret
+
+/******************************************************************************/
+/*
+void seed_init(uint8_t * key, seed_ctx_t * ctx){
+       memcpy(ctx->k, key, 128/8);
+}
+*/
+
+.global seed_init
+seed_init:
+       movw r26, r24
+       movw r30, r22
+       ldi r22, 16
+1:
+       ld r0, X+
+       st Z+, r0
+       dec r22
+       brne 1b 
+       ret
+/******************************************************************************/
+/*
+typedef struct {
+       uint32_t k0, k1;
+} keypair_t;
+
+keypair_t getnextkeys(uint32_t *keystate, uint8_t curround){
+       keypair_t ret;
+       if (curround>15){
+               / * ERROR * /
+               ret.k0 = ret.k1 = 0;
+       } else {
+       / *     ret.k0 = seed_g_function(keystate[0] + keystate[2] - pgm_read_dword(&(seed_kc[curround])));
+               ret.k1 = seed_g_function(keystate[1] - keystate[3] + pgm_read_dword(&(seed_kc[curround]))); * /
+               ret.k0 = bigendian_sum32(keystate[0], keystate[2]);
+               ret.k0 = bigendian_sub32(ret.k0, pgm_read_dword(&(seed_kc[curround])));
+               ret.k0 = seed_g_function(ret.k0);
+               ret.k1 = bigendian_sub32(keystate[1], keystate[3]);
+               ret.k1 = bigendian_sum32(ret.k1, pgm_read_dword(&(seed_kc[curround])));
+               ret.k1 = seed_g_function(ret.k1);
+               
+               if (curround & 1){
+                       / * odd round (1,3,5, ...) * /
+                       ((uint64_t*)keystate)[1] = bigendian_rotl8_64( ((uint64_t*)keystate)[1] );
+               } else {
+                       / * even round (0,2,4, ...) * /
+                       ((uint64_t*)keystate)[0] = bigendian_rotr8_64(((uint64_t*)keystate)[0]);
+               }
+       }
+       return ret;
+}
+*/
+/*
+ * param keystate: r24:r25
+ * param curround: r22
+ */
+XRC0 = 10
+XRC1 = 11
+XRC2 = 12
+XRC3 = 13
+D0 = 14
+D1 = 15
+D2 = 16
+D3 = 17
+
+compute_keys:
+       ldi r30, lo8(seed_kc)
+       ldi r31, hi8(seed_kc)
+       lsl r22
+       lsl r22
+       add r30, r22
+       adc r31, r1
+       lpm XRC0, Z+
+       lpm XRC1, Z+
+       lpm XRC2, Z+
+       lpm XRC3, Z+
+       movw r28, r24
+       ldd r25, Y+0*4+3
+       ldd r24, Y+0*4+2
+       ldd r23, Y+0*4+1
+       ldd r22, Y+0*4+0
+       
+       ldd r0, Y+2*4+3
+       add r25, r0
+       ldd r0, Y+2*4+2
+       adc r24, r0
+       ldd r0, Y+2*4+1
+       adc r23, r0
+       ldd r0, Y+2*4+0
+       adc r22, r0
+
+       sub r25, XRC3
+       sbc r24, XRC2
+       sbc r23, XRC1
+       sbc r22, XRC0
+       rcall seed_g_function
+       mov D0, r22
+       mov D1, r23
+       mov D2, r24
+       mov D3, r25
+       
  
  
+       ldd r25, Y+1*4+3
+       ldd r24, Y+1*4+2
+       ldd r23, Y+1*4+1
+       ldd r22, Y+1*4+0
  
  
+       ldd r0, Y+3*4+3
+       sub r25, r0
+       ldd r0, Y+3*4+2
+       sbc r24, r0
+       ldd r0, Y+3*4+1
+       sbc r23, r0
+       ldd r0, Y+3*4+0
+       sbc r22, r0
  
  
+       add r25, XRC3
+       adc r24, XRC2
+       adc r23, XRC1
+       adc r22, XRC0
+       rcall seed_g_function
  
  
+       mov r21, D3
+       mov r20, D2
+       mov r19, D1
+       mov r18, D0 
+       ret
+
+seed_getnextkeys:
+       push_range 10, 17
+       push r28
+       push r29
+;      andi r22, 0x0F
+       bst r22,0
+       rcall compute_keys              
+       brtc even_round
+odd_round:
+
+       adiw r28, 8
+       ld r26, Y
+       ldd r0, Y+1
+       std Y+0, r0
+       ldd r0, Y+2
+       std Y+1, r0
+       ldd r0, Y+3
+       std Y+2, r0
+       ldd r0, Y+4
+       std Y+3, r0
+       ldd r0, Y+5
+       std Y+4, r0
+       ldd r0, Y+6
+       std Y+5, r0
+       ldd r0, Y+7
+       std Y+6, r0
+       std Y+7, r26    
+/*
+       movw r30, r28
+       ld r26, Z+
+       ldi r27, 7
+1:
+       ld r0, Z+
+       st Y+, r0
+       dec r27
+       brne 1b
+       st Y, r26
+*/     
+       rjmp 4f
+
+even_round:
+
+       ldd r26, Y+7
+       ldd r0, Y+6
+       std Y+7, r0
+       ldd r0, Y+5
+       std Y+6, r0
+       ldd r0, Y+4
+       std Y+5, r0
+       ldd r0, Y+3
+       std Y+4, r0
+       ldd r0, Y+2
+       std Y+3, r0
+       ldd r0, Y+1
+       std Y+2, r0
+       ldd r0, Y+0
+       std Y+1, r0
+       std Y+0, r26
+/*
+       adiw r28, 7     
+       ld r26, Y
+       ldi r27, 7      
+1:
+       ld r0, -Y
+       std Y+1, r0
+       dec r27
+       brne 1b
+       st Y, r26
+*/
+4:     
+       pop r29
+       pop r28
+       pop_range 10, 17
+       ret
+
+/******************************************************************************/
+/*
+keypair_t getprevkeys(uint32_t *keystate, uint8_t curround){
+       keypair_t ret;
+       if (curround>15){
+               / * ERROR * /
+               ret.k0 = ret.k1 = 0;
+       } else {
+               if (curround & 1){
+                       / * odd round (1,3,5, ..., 15) * /
+                       ((uint64_t*)keystate)[1] = bigendian_rotr8_64( ((uint64_t*)keystate)[1] );
+               } else {
+                       / * even round (0,2,4, ..., 14) * /
+                       ((uint64_t*)keystate)[0] = bigendian_rotl8_64(((uint64_t*)keystate)[0]);
+               }
+       / *     ret.k0 = seed_g_function(keystate[0] + keystate[2] - pgm_read_dword(&(seed_kc[curround])));
+               ret.k1 = seed_g_function(keystate[1] - keystate[3] + pgm_read_dword(&(seed_kc[curround]))); * /
+               ret.k0 = bigendian_sum32(keystate[0], keystate[2]);
+               ret.k0 = bigendian_sub32(ret.k0, pgm_read_dword(&(seed_kc[curround])));
+               ret.k0 = seed_g_function(ret.k0);
+               ret.k1 = bigendian_sub32(keystate[1], keystate[3]);
+               ret.k1 = bigendian_sum32(ret.k1, pgm_read_dword(&(seed_kc[curround])));
+               ret.k1 = seed_g_function(ret.k1);
+               }
+       return ret;
+}
+*/
+/*
+ * param keystate: r24:r25
+ * param curround: r22
+ */
+
+seed_getprevkeys:
+       push_range 10, 17
+       push r28
+       push r29
+       movw r28, r24   
+;      andi r22, 0x0F
+       bst r22, 0
+       brts r_odd_round
+r_even_round:
+       ldd r26, Y+0
+       ldd r0, Y+1
+       std Y+0, r0
+       ldd r0, Y+2
+       std Y+1, r0
+       ldd r0, Y+3
+       std Y+2, r0
+       ldd r0, Y+4
+       std Y+3, r0
+       ldd r0, Y+5
+       std Y+4, r0
+       ldd r0, Y+6
+       std Y+5, r0
+       ldd r0, Y+7
+       std Y+6, r0
+       std Y+7, r26    
+/*
+       movw r30, r28
+       ld r26, Z+
+       ldi r27, 7
+1:
+       ld r0, Z+
+       st Y+, r0
+       dec r27
+       brne 1b
+       st Y, r26
+*/     
+
+       rjmp 4f
+r_odd_round:
+       ldd r26, Y+8+7
+       ldd r0, Y+8+6
+       std Y+8+7, r0
+       ldd r0, Y+8+5
+       std Y+8+6, r0
+       ldd r0, Y+8+4
+       std Y+8+5, r0
+       ldd r0, Y+8+3
+       std Y+8+4, r0
+       ldd r0, Y+8+2
+       std Y+8+3, r0
+       ldd r0, Y+8+1
+       std Y+8+2, r0
+       ldd r0, Y+8+0
+       std Y+8+1, r0
+       std Y+8+0, r26
+/*
+       adiw r28, 7     
+       ld r26, Y
+       ldi r27, 7      
+1:
+       ld r0, -Y
+       std Y+1, r0
+       dec r27
+       brne 1b
+       st Y, r26
+*/
+4:
+       rcall compute_keys      
+
+       pop r29
+       pop r28
+       pop_range 10, 17
+       ret
+
+/******************************************************************************/
+
+seed_kc:
+.long   0xb979379e 
+.long   0x73f36e3c
+.long   0xe6e6dd78 
+.long   0xcccdbbf1 
+.long   0x999b77e3 
+.long   0x3337efc6 
+.long   0x676ede8d 
+.long   0xcfdcbc1b 
+.long   0x9eb97937
+.long   0x3c73f36e     
+.long   0x78e6e6dd
+.long   0xf1cccdbb
+.long   0xe3999b77
+.long   0xc63337ef
+.long   0x8d676ede
+.long   0x1bcfdcbc
+
+/******************************************************************************/
+/*
+#define L (((uint64_t*)buffer)[0])
+#define R (((uint64_t*)buffer)[1])
+
+void seed_enc(void * buffer, seed_ctx_t * ctx){
+       uint8_t r;
+       keypair_t k;
+       for(r=0; r<8; ++r){
+                       k = seed_getnextkeys(ctx->k, 2*r);
+/ *
+       DEBUG_S("\r\n\tDBG ka,0: "); uart_hexdump(&k.k0, 4);
+       DEBUG_S("\r\n\tDBG ka,1: "); uart_hexdump(&k.k1, 4);
+       DEBUG_S("\r\n\t DBG L:   "); uart_hexdump((uint8_t*)buffer+0, 8);
+       DEBUG_S("\r\n\t DBG R:   "); uart_hexdump((uint8_t*)buffer+8, 8);
+* /
+                       L ^= seed_f_function(&R,k.k0,k.k1);
+                       
+                       k = seed_getnextkeys(ctx->k, 2*r+1);
+/ *
+       DEBUG_S("\r\n\tDBG kb,0: "); uart_hexdump(&k.k0, 4);
+       DEBUG_S("\r\n\tDBG kb,1: "); uart_hexdump(&k.k1, 4);
+       DEBUG_S("\r\n\t DBG L:   "); uart_hexdump((uint8_t*)buffer+8, 8);
+       DEBUG_S("\r\n\t DBG R:   "); uart_hexdump((uint8_t*)buffer+0, 8);
+* /
+                       R ^= seed_f_function(&L,k.k0,k.k1);
+       }
+       / * just an exchange without temp. variable * /
+       L ^= R;
+       R ^= L;
+       L ^= R;
+}
+*/
+/*
+ * param buffer: r24:r25
+ * param ctx:    r22:r23
+ */
+CTR  = 9
+xLPTR = 10
+xRPTR = 12
+CPTR = 14
+
+.global seed_enc
+seed_enc:
+       push_range 9, 17
+       push r28
+       push r29        
+       clr CTR
+       movw xLPTR, r24
+       adiw r24, 8
+       movw xRPTR, r24         
+       movw CPTR, r22
+1:
+       movw r28, xLPTR
+       movw r24, CPTR
+       mov r22, CTR
+       lsl r22
+       rcall seed_getnextkeys  
+
+       /* use pen & paper to understand the following permutation */
+       movw r16, r22
+       movw r22, r18
+       movw r18, r24
+       movw r24, r20
+       movw r20, r22
+       movw r22, r24
+       movw r24, xRPTR 
+
+       rcall seed_f_function
+
+       ld r0, Y
+       eor r0, r18
+       st Y+, r0
+       ld r0, Y
+       eor r0, r19
+       st Y+, r0
+       ld r0, Y
+       eor r0, r20
+       st Y+, r0
+       ld r0, Y
+       eor r0, r21
+       st Y+, r0
+       ld r0, Y
+       eor r0, r22
+       st Y+, r0
+       ld r0, Y
+       eor r0, r23
+       st Y+, r0
+       ld r0, Y
+       eor r0, r24
+       st Y+, r0
+       ld r0, Y
+       eor r0, r25
+       st Y+, r0
+       /* secound half */
+       movw r24, CPTR
+       mov r22, CTR
+       lsl r22
+       inc r22
+       rcall seed_getnextkeys  
+
+       movw r16, r22
+       movw r22, r18
+       movw r18, r24
+       movw r24, r20
+       movw r20, r22
+       movw r22, r24
+       movw r24, xLPTR 
+       
+       rcall seed_f_function
+
+       ld r0, Y
+       eor r0, r18
+       st Y+, r0
+       ld r0, Y
+       eor r0, r19
+       st Y+, r0
+       ld r0, Y
+       eor r0, r20
+       st Y+, r0
+       ld r0, Y
+       eor r0, r21
+       st Y+, r0
+       ld r0, Y
+       eor r0, r22
+       st Y+, r0
+       ld r0, Y
+       eor r0, r23
+       st Y+, r0
+       ld r0, Y
+       eor r0, r24
+       st Y+, r0
+       ld r0, Y
+       eor r0, r25
+       st Y+, r0
+       
+       inc CTR
+       bst CTR, 3
+       brts 3f
+       rjmp 1b
+3:
+       movw r28, xLPTR
+       movw r30, xRPTR
+       ldi r17, 8
+4:
+       ld r10, Y
+       ld r11, Z
+       st Z+, r10
+       st Y+, r11
+       dec r17
+       brne 4b
+5:
+       pop r29
+       pop r28
+       pop_range 9, 17
+       ret
+
+/******************************************************************************/
+/*
+#define L (((uint64_t*)buffer)[0])
+#define R (((uint64_t*)buffer)[1])
+
+void seed_dec(void * buffer, seed_ctx_t * ctx){
+       int8_t r;
+       keypair_t k;
+       for(r=7; r>=0; --r){
+                       k = seed_getprevkeys(ctx->k, 2*r+1);
+/ *
+       DEBUG_S("\r\n\tDBG ka,0: "); uart_hexdump(&k.k0, 4);
+       DEBUG_S("\r\n\tDBG ka,1: "); uart_hexdump(&k.k1, 4);
+       DEBUG_S("\r\n\t DBG L: "); uart_hexdump((uint8_t*)buffer+0, 8);
+       DEBUG_S("\r\n\t DBG R: "); uart_hexdump((uint8_t*)buffer+8, 8);
+* /
+                       L ^= seed_f_function(&R,k.k0,k.k1);
+                       
+                       k = seed_getprevkeys(ctx->k, 2*r+0);
+/ *
+       DEBUG_S("\r\n\tDBG kb,0: "); uart_hexdump(&k.k0, 4);
+       DEBUG_S("\r\n\tDBG kb,1: "); uart_hexdump(&k.k1, 4);
+       DEBUG_S("\r\n\t DBG L: "); uart_hexdump((uint8_t*)buffer+8, 8);
+       DEBUG_S("\r\n\t DBG R: "); uart_hexdump((uint8_t*)buffer+0, 8);
+* /
+                       R ^= seed_f_function(&L,k.k0,k.k1);
+       }
+       / * just an exchange without temp. variable * /
+       L ^= R;
+       R ^= L;
+       L ^= R;
+}
+*/
+/*
+ * param buffer: r24:r25
+ * param ctx:    r22:r23
+ */
+CTR  = 9
+xLPTR = 10
+xRPTR = 12
+CPTR = 14
+
+.global seed_dec
+seed_dec:
+       push_range 9, 17
+       push r28
+       push r29        
+       ldi r16, 7
+       mov CTR, r16
+       movw xLPTR, r24
+       adiw r24, 8
+       movw xRPTR, r24         
+       movw CPTR, r22
+1:
+       movw r28, xLPTR
+       movw r24, CPTR
+       mov r22, CTR
+       lsl r22
+       inc r22
+       rcall seed_getprevkeys  
+
+       /* use pen & paper to understand the following permutation */
+       movw r16, r22
+       movw r22, r18
+       movw r18, r24
+       movw r24, r20
+       movw r20, r22
+       movw r22, r24
+       movw r24, xRPTR 
+
+       rcall seed_f_function
+
+       ld r0, Y
+       eor r0, r18
+       st Y+, r0
+       ld r0, Y
+       eor r0, r19
+       st Y+, r0
+       ld r0, Y
+       eor r0, r20
+       st Y+, r0
+       ld r0, Y
+       eor r0, r21
+       st Y+, r0
+       ld r0, Y
+       eor r0, r22
+       st Y+, r0
+       ld r0, Y
+       eor r0, r23
+       st Y+, r0
+       ld r0, Y
+       eor r0, r24
+       st Y+, r0
+       ld r0, Y
+       eor r0, r25
+       st Y+, r0
+       /* secound half */
+       movw r24, CPTR
+       mov r22, CTR
+       lsl r22
+       rcall seed_getprevkeys  
+
+       movw r16, r22
+       movw r22, r18
+       movw r18, r24
+       movw r24, r20
+       movw r20, r22
+       movw r22, r24
+       movw r24, xLPTR 
+       
+       rcall seed_f_function
+
+       ld r0, Y
+       eor r0, r18
+       st Y+, r0
+       ld r0, Y
+       eor r0, r19
+       st Y+, r0
+       ld r0, Y
+       eor r0, r20
+       st Y+, r0
+       ld r0, Y
+       eor r0, r21
+       st Y+, r0
+       ld r0, Y
+       eor r0, r22
+       st Y+, r0
+       ld r0, Y
+       eor r0, r23
+       st Y+, r0
+       ld r0, Y
+       eor r0, r24
+       st Y+, r0
+       ld r0, Y
+       eor r0, r25
+       st Y+, r0
+       
+       dec CTR
+       brmi 3f
+       rjmp 1b
+3:
+       movw r28, xLPTR
+       movw r30, xRPTR
+       ldi r17, 8
+4:
+       ld r10, Y
+       ld r11, Z
+       st Z+, r10
+       st Y+, r11
+       dec r17
+       brne 4b
+5:
+       pop r29
+       pop r28
+       pop_range 9, 17
+       ret