+ mov r21, D3
+ mov r20, D2
+ mov r19, D1
+ mov r18, D0
+ ret
+
+seed_getnextkeys:
+ push_range 10, 17
+ push r28
+ push r29
+; andi r22, 0x0F
+ bst r22,0
+ rcall compute_keys
+ brtc even_round
+odd_round:
+
+ adiw r28, 8
+ ld r26, Y
+ ldd r0, Y+1
+ std Y+0, r0
+ ldd r0, Y+2
+ std Y+1, r0
+ ldd r0, Y+3
+ std Y+2, r0
+ ldd r0, Y+4
+ std Y+3, r0
+ ldd r0, Y+5
+ std Y+4, r0
+ ldd r0, Y+6
+ std Y+5, r0
+ ldd r0, Y+7
+ std Y+6, r0
+ std Y+7, r26
+/*
+ movw r30, r28
+ ld r26, Z+
+ ldi r27, 7
+1:
+ ld r0, Z+
+ st Y+, r0
+ dec r27
+ brne 1b
+ st Y, r26
+*/
+ rjmp 4f
+
+even_round:
+
+ ldd r26, Y+7
+ ldd r0, Y+6
+ std Y+7, r0
+ ldd r0, Y+5
+ std Y+6, r0
+ ldd r0, Y+4
+ std Y+5, r0
+ ldd r0, Y+3
+ std Y+4, r0
+ ldd r0, Y+2
+ std Y+3, r0
+ ldd r0, Y+1
+ std Y+2, r0
+ ldd r0, Y+0
+ std Y+1, r0
+ std Y+0, r26
+/*
+ adiw r28, 7
+ ld r26, Y
+ ldi r27, 7
+1:
+ ld r0, -Y
+ std Y+1, r0
+ dec r27
+ brne 1b
+ st Y, r26
+*/
+4:
+ pop r29
+ pop r28
+ pop_range 10, 17
+ ret
+
+/******************************************************************************/
+/*
+keypair_t getprevkeys(uint32_t *keystate, uint8_t curround){
+ keypair_t ret;
+ if (curround>15){
+ / * ERROR * /
+ ret.k0 = ret.k1 = 0;
+ } else {
+ if (curround & 1){
+ / * odd round (1,3,5, ..., 15) * /
+ ((uint64_t*)keystate)[1] = bigendian_rotr8_64( ((uint64_t*)keystate)[1] );
+ } else {
+ / * even round (0,2,4, ..., 14) * /
+ ((uint64_t*)keystate)[0] = bigendian_rotl8_64(((uint64_t*)keystate)[0]);
+ }
+ / * ret.k0 = seed_g_function(keystate[0] + keystate[2] - pgm_read_dword(&(seed_kc[curround])));
+ ret.k1 = seed_g_function(keystate[1] - keystate[3] + pgm_read_dword(&(seed_kc[curround]))); * /
+ ret.k0 = bigendian_sum32(keystate[0], keystate[2]);
+ ret.k0 = bigendian_sub32(ret.k0, pgm_read_dword(&(seed_kc[curround])));
+ ret.k0 = seed_g_function(ret.k0);
+ ret.k1 = bigendian_sub32(keystate[1], keystate[3]);
+ ret.k1 = bigendian_sum32(ret.k1, pgm_read_dword(&(seed_kc[curround])));
+ ret.k1 = seed_g_function(ret.k1);
+ }
+ return ret;
+}
+*/
+/*
+ * param keystate: r24:r25
+ * param curround: r22
+ */
+
+seed_getprevkeys:
+ push_range 10, 17
+ push r28
+ push r29
+ movw r28, r24
+; andi r22, 0x0F
+ bst r22, 0
+ brts r_odd_round
+r_even_round:
+ ldd r26, Y+0
+ ldd r0, Y+1
+ std Y+0, r0
+ ldd r0, Y+2
+ std Y+1, r0
+ ldd r0, Y+3
+ std Y+2, r0
+ ldd r0, Y+4
+ std Y+3, r0
+ ldd r0, Y+5
+ std Y+4, r0
+ ldd r0, Y+6
+ std Y+5, r0
+ ldd r0, Y+7
+ std Y+6, r0
+ std Y+7, r26
+/*
+ movw r30, r28
+ ld r26, Z+
+ ldi r27, 7
+1:
+ ld r0, Z+
+ st Y+, r0
+ dec r27
+ brne 1b
+ st Y, r26
+*/
+
+ rjmp 4f
+r_odd_round:
+ ldd r26, Y+8+7
+ ldd r0, Y+8+6
+ std Y+8+7, r0
+ ldd r0, Y+8+5
+ std Y+8+6, r0
+ ldd r0, Y+8+4
+ std Y+8+5, r0
+ ldd r0, Y+8+3
+ std Y+8+4, r0
+ ldd r0, Y+8+2
+ std Y+8+3, r0
+ ldd r0, Y+8+1
+ std Y+8+2, r0
+ ldd r0, Y+8+0
+ std Y+8+1, r0
+ std Y+8+0, r26
+/*
+ adiw r28, 7
+ ld r26, Y
+ ldi r27, 7
+1:
+ ld r0, -Y
+ std Y+1, r0
+ dec r27
+ brne 1b
+ st Y, r26
+*/
+4:
+ rcall compute_keys
+
+ pop r29
+ pop r28
+ pop_range 10, 17
+ ret
+
+/******************************************************************************/
+
+seed_kc:
+.long 0xb979379e
+.long 0x73f36e3c
+.long 0xe6e6dd78
+.long 0xcccdbbf1
+.long 0x999b77e3
+.long 0x3337efc6
+.long 0x676ede8d
+.long 0xcfdcbc1b
+.long 0x9eb97937
+.long 0x3c73f36e
+.long 0x78e6e6dd
+.long 0xf1cccdbb
+.long 0xe3999b77
+.long 0xc63337ef
+.long 0x8d676ede
+.long 0x1bcfdcbc
+
+/******************************************************************************/
+/*
+#define L (((uint64_t*)buffer)[0])
+#define R (((uint64_t*)buffer)[1])
+
+void seed_enc(void * buffer, seed_ctx_t * ctx){
+ uint8_t r;
+ keypair_t k;
+ for(r=0; r<8; ++r){
+ k = seed_getnextkeys(ctx->k, 2*r);
+/ *
+ DEBUG_S("\r\n\tDBG ka,0: "); uart_hexdump(&k.k0, 4);
+ DEBUG_S("\r\n\tDBG ka,1: "); uart_hexdump(&k.k1, 4);
+ DEBUG_S("\r\n\t DBG L: "); uart_hexdump((uint8_t*)buffer+0, 8);
+ DEBUG_S("\r\n\t DBG R: "); uart_hexdump((uint8_t*)buffer+8, 8);
+* /
+ L ^= seed_f_function(&R,k.k0,k.k1);
+
+ k = seed_getnextkeys(ctx->k, 2*r+1);
+/ *
+ DEBUG_S("\r\n\tDBG kb,0: "); uart_hexdump(&k.k0, 4);
+ DEBUG_S("\r\n\tDBG kb,1: "); uart_hexdump(&k.k1, 4);
+ DEBUG_S("\r\n\t DBG L: "); uart_hexdump((uint8_t*)buffer+8, 8);
+ DEBUG_S("\r\n\t DBG R: "); uart_hexdump((uint8_t*)buffer+0, 8);
+* /
+ R ^= seed_f_function(&L,k.k0,k.k1);
+ }
+ / * just an exchange without temp. variable * /
+ L ^= R;
+ R ^= L;
+ L ^= R;
+}
+*/
+/*
+ * param buffer: r24:r25
+ * param ctx: r22:r23
+ */
+CTR = 9
+xLPTR = 10
+xRPTR = 12
+CPTR = 14
+
+.global seed_enc
+seed_enc:
+ push_range 9, 17
+ push r28
+ push r29
+ clr CTR
+ movw xLPTR, r24
+ adiw r24, 8
+ movw xRPTR, r24
+ movw CPTR, r22
+1:
+ movw r28, xLPTR
+ movw r24, CPTR
+ mov r22, CTR
+ lsl r22
+ rcall seed_getnextkeys
+
+ /* use pen & paper to understand the following permutation */
+ movw r16, r22
+ movw r22, r18
+ movw r18, r24
+ movw r24, r20
+ movw r20, r22
+ movw r22, r24
+ movw r24, xRPTR
+
+ rcall seed_f_function
+
+ ld r0, Y
+ eor r0, r18
+ st Y+, r0
+ ld r0, Y
+ eor r0, r19
+ st Y+, r0
+ ld r0, Y
+ eor r0, r20
+ st Y+, r0
+ ld r0, Y
+ eor r0, r21
+ st Y+, r0
+ ld r0, Y
+ eor r0, r22
+ st Y+, r0
+ ld r0, Y
+ eor r0, r23
+ st Y+, r0
+ ld r0, Y
+ eor r0, r24
+ st Y+, r0
+ ld r0, Y
+ eor r0, r25
+ st Y+, r0
+ /* secound half */
+ movw r24, CPTR
+ mov r22, CTR
+ lsl r22
+ inc r22
+ rcall seed_getnextkeys
+
+ movw r16, r22
+ movw r22, r18
+ movw r18, r24
+ movw r24, r20
+ movw r20, r22
+ movw r22, r24
+ movw r24, xLPTR
+
+ rcall seed_f_function
+
+ ld r0, Y
+ eor r0, r18
+ st Y+, r0
+ ld r0, Y
+ eor r0, r19
+ st Y+, r0
+ ld r0, Y
+ eor r0, r20
+ st Y+, r0
+ ld r0, Y
+ eor r0, r21
+ st Y+, r0
+ ld r0, Y
+ eor r0, r22
+ st Y+, r0
+ ld r0, Y
+ eor r0, r23
+ st Y+, r0
+ ld r0, Y
+ eor r0, r24
+ st Y+, r0
+ ld r0, Y
+ eor r0, r25
+ st Y+, r0
+
+ inc CTR
+ bst CTR, 3
+ brts 3f
+ rjmp 1b
+3:
+ movw r28, xLPTR
+ movw r30, xRPTR
+ ldi r17, 8
+4:
+ ld r10, Y
+ ld r11, Z
+ st Z+, r10
+ st Y+, r11
+ dec r17
+ brne 4b
+5:
+ pop r29
+ pop r28
+ pop_range 9, 17
+ ret
+
+/******************************************************************************/
+/*
+#define L (((uint64_t*)buffer)[0])
+#define R (((uint64_t*)buffer)[1])
+
+void seed_dec(void * buffer, seed_ctx_t * ctx){
+ int8_t r;
+ keypair_t k;
+ for(r=7; r>=0; --r){
+ k = seed_getprevkeys(ctx->k, 2*r+1);
+/ *
+ DEBUG_S("\r\n\tDBG ka,0: "); uart_hexdump(&k.k0, 4);
+ DEBUG_S("\r\n\tDBG ka,1: "); uart_hexdump(&k.k1, 4);
+ DEBUG_S("\r\n\t DBG L: "); uart_hexdump((uint8_t*)buffer+0, 8);
+ DEBUG_S("\r\n\t DBG R: "); uart_hexdump((uint8_t*)buffer+8, 8);
+* /
+ L ^= seed_f_function(&R,k.k0,k.k1);
+
+ k = seed_getprevkeys(ctx->k, 2*r+0);
+/ *
+ DEBUG_S("\r\n\tDBG kb,0: "); uart_hexdump(&k.k0, 4);
+ DEBUG_S("\r\n\tDBG kb,1: "); uart_hexdump(&k.k1, 4);
+ DEBUG_S("\r\n\t DBG L: "); uart_hexdump((uint8_t*)buffer+8, 8);
+ DEBUG_S("\r\n\t DBG R: "); uart_hexdump((uint8_t*)buffer+0, 8);
+* /
+ R ^= seed_f_function(&L,k.k0,k.k1);
+ }
+ / * just an exchange without temp. variable * /
+ L ^= R;
+ R ^= L;
+ L ^= R;
+}
+*/
+/*
+ * param buffer: r24:r25
+ * param ctx: r22:r23
+ */
+CTR = 9
+xLPTR = 10
+xRPTR = 12
+CPTR = 14
+
+.global seed_dec
+seed_dec:
+ push_range 9, 17
+ push r28
+ push r29
+ ldi r16, 7
+ mov CTR, r16
+ movw xLPTR, r24
+ adiw r24, 8
+ movw xRPTR, r24
+ movw CPTR, r22
+1:
+ movw r28, xLPTR
+ movw r24, CPTR
+ mov r22, CTR
+ lsl r22
+ inc r22
+ rcall seed_getprevkeys
+
+ /* use pen & paper to understand the following permutation */
+ movw r16, r22
+ movw r22, r18
+ movw r18, r24
+ movw r24, r20
+ movw r20, r22
+ movw r22, r24
+ movw r24, xRPTR
+
+ rcall seed_f_function
+
+ ld r0, Y
+ eor r0, r18
+ st Y+, r0
+ ld r0, Y
+ eor r0, r19
+ st Y+, r0
+ ld r0, Y
+ eor r0, r20
+ st Y+, r0
+ ld r0, Y
+ eor r0, r21
+ st Y+, r0
+ ld r0, Y
+ eor r0, r22
+ st Y+, r0
+ ld r0, Y
+ eor r0, r23
+ st Y+, r0
+ ld r0, Y
+ eor r0, r24
+ st Y+, r0
+ ld r0, Y
+ eor r0, r25
+ st Y+, r0
+ /* secound half */
+ movw r24, CPTR
+ mov r22, CTR
+ lsl r22
+ rcall seed_getprevkeys
+
+ movw r16, r22
+ movw r22, r18
+ movw r18, r24
+ movw r24, r20
+ movw r20, r22
+ movw r22, r24
+ movw r24, xLPTR
+
+ rcall seed_f_function
+
+ ld r0, Y
+ eor r0, r18
+ st Y+, r0
+ ld r0, Y
+ eor r0, r19
+ st Y+, r0
+ ld r0, Y
+ eor r0, r20
+ st Y+, r0
+ ld r0, Y
+ eor r0, r21
+ st Y+, r0
+ ld r0, Y
+ eor r0, r22
+ st Y+, r0
+ ld r0, Y
+ eor r0, r23
+ st Y+, r0
+ ld r0, Y
+ eor r0, r24
+ st Y+, r0
+ ld r0, Y
+ eor r0, r25
+ st Y+, r0
+
+ dec CTR
+ brmi 3f
+ rjmp 1b
+3:
+ movw r28, xLPTR
+ movw r30, xRPTR
+ ldi r17, 8
+4:
+ ld r10, Y
+ ld r11, Z
+ st Z+, r10
+ st Y+, r11
+ dec r17
+ brne 4b
+5:
+ pop r29
+ pop r28
+ pop_range 9, 17
+ ret