2 * noekeon assembler implementation for avr
4 * email: daniel.otte@rub.de
29 in r28, _SFR_IO_ADDR(SREG)
35 out _SFR_IO_ADDR(SREG), r28
71 .macro op32_4t op a b c d w x y z
79 .macro op32_prefix op p q a b c d w x y z
86 ; === bigendian_rotl32 ===
87 ; this function rotates a 32bit bigendian word n bits to the left
88 ; param1: the 32-bit value
89 ; given in r25,r24,r23,r22 (r22 is most significant)
90 ; param2: the 8-bit parameter giving the number of bits to rotate
92 ; return: the rotatet 32-bit word
93 ; given in r25,r24,r23,r22
96 /* copy high bit of r22 to carry */
108 bigendian_rotl32_exit:
113 /******************************************************************************/
115 ; === bigendian_rotl32 ===
116 ; this function rotates a 32bit bigendian word n bits to the right
117 ; param1: the 32-bit value
118 ; given in r25,r24,r23,r22 (r22 is most significant)
119 ; param2: the 8-bit parameter giving the number of bits to rotate
121 ; return: the rotatet 32-bit word
122 ; given in r25,r24,r23,r22
125 /* copy high bit of r25 to carry */
137 bigendian_rotr32_exit:
141 /******************************************************************************/
143 void theta(uint32_t* k, uint32_t* a){
145 temp = a[0] ^ a[2]; temp ^= ROTR32(temp, 8) ^ ROTL32(temp, 8);
154 temp = a[1] ^ a[3]; temp ^= ROTR32(temp, 8) ^ ROTL32(temp, 8);
160 round_const: .byte 0x1B, 0x36, 0x6C, 0xD8, 0xAB, 0x4D, 0x9A, \
161 0x2F, 0x5E, 0xBC, 0x63, 0xC6, 0x97, 0x35, 0x6A, \
187 ; param1: the state in r2-r17
188 ; param2: pointer to k in X (r26,r27)
196 /* temp = a[0] ^ a[2]; temp ^= temp>>>8 ^ temp<<<8 */
197 op32_prefix mov, temp_, state0_, a,b,c,d, 0,1,2,3
198 op32_prefix eor, temp_, state2_, a,b,c,d, 0,1,2,3
205 op32_prefix eor, temp_, r, a,b,c,d, 1,1,1,1
207 /* temp is know a little bit mixed c,d,a,b (if abcd is normal order) */
219 /* state ^ k (X points to K) */
221 clr r29 /* Y points to r2 aka state0_0 */
230 sbiw r26, 16 /* set X back to key */
248 /* temp is know a little bit mixed c,d,a,b (if abcd is normal order) */
263 /******************************************************************************/
264 ; === noekeon_enc ===
266 ; param1: pointer to buffer/state (r24,r25)
267 ; param2: pointer to k (r22,r23)
275 clr r29 /* Y points at r2 aka state0_0 */
276 movw r30, r24 /* Z points at state */
280 push r22 /* 16 is also the number of rounds and gets pushed here */
287 push r1 /* push round constan2 (0x00) */
289 push r20 /* push round constan2 (0x00) */
292 ldi r30, lo8(round_const+15)
293 ldi r31, hi8(round_const+15)
301 call round /* pops rc2 & rc1 */
326 /******************************************************************************/
327 /******************************************************************************/
328 ; === noekeon_dec ===
330 ; param1: pointer to buffer/state (r24,r25)
331 ; param2: pointer to k (r22,r23)
336 /* allocate 16 bytes on the stack */
337 in r30, _SFR_IO_ADDR(SPL)
338 in r31, _SFR_IO_ADDR(SPH)
340 out _SFR_IO_ADDR(SPH), r31
341 out _SFR_IO_ADDR(SPL), r30
344 /* push state pointer */
347 movw r26, r22 /* move key ptr to X */
349 /* set stackkey to zero */
355 /* copy key to state */
365 sbiw r26, 16 /* set X back to begining of stack key */
368 /* mov state to stackkey */
376 sbiw r26, 16 /* set X back to begining of stack key */
378 /* move data from stateptr to state */
395 push r20 /* push round constant2 (0xD4) */
396 push r22 /* push round constan1 (0x00) */
399 ldi r30, lo8(round_const-1)
400 ldi r31, hi8(round_const-1)
408 call round /* pops rc2 & rc1 */
421 /* write state back */
422 pop r31 /* pop state pointer */
433 /* remove key from stack */
434 in r30, _SFR_IO_ADDR(SPL)
435 in r31, _SFR_IO_ADDR(SPH)
437 out _SFR_IO_ADDR(SPH), r31
438 out _SFR_IO_ADDR(SPL), r30
441 /******************************************************************************/
454 ldi r30, lo8(bigendian_rotl32)
455 ldi r31, hi8(bigendian_rotl32)
457 /* pi1 done; now gamma */
460 xchg state0_0, state3_0
461 xchg state0_1, state3_1
462 xchg state0_2, state3_2
463 xchg state0_3, state3_3
464 /* a[2] ^= a[0] ^ a[1] ^ a[3] */
465 op32 eor, state2, state0
466 op32 eor, state2, state1
467 op32 eor, state2, state3
469 eor state2_0, state0_0
470 eor state2_1, state0_1
471 eor state2_2, state0_2
472 eor state2_3, state0_3
473 eor state2_0, state1_0
474 eor state2_1, state1_1
475 eor state2_2, state1_2
476 eor state2_3, state1_3
477 eor state2_0, state3_0
478 eor state2_1, state3_1
479 eor state2_2, state3_2
480 eor state2_3, state3_3
483 ldi r30, lo8(bigendian_rotr32)
484 ldi r31, hi8(bigendian_rotr32)
489 /* a[1] ^= ~(a[3]|a[2])*/
510 /* a[0] ^= a[2]&a[1] */
566 /******************************************************************************/
569 void noekeon_init(void* key, noekeon_ctx_t* ctx){
572 memset(nullv, 0, 16);
573 memcpy(ctx, key, 16);
574 noekeon_enc(ctx, nullv);
579 ; === noekeon_init ===
581 ; param1: pointer to key (r24,r25)
582 ; param2: pointer to context (r22,r23)
584 in r30, _SFR_IO_ADDR(SPL)
585 in r31, _SFR_IO_ADDR(SPH)
587 out _SFR_IO_ADDR(SPH), r31
588 out _SFR_IO_ADDR(SPL), r30
593 /* set nullv(stack) to zero */
599 /* copy key data to ctx */
612 in r30, _SFR_IO_ADDR(SPL)
613 in r31, _SFR_IO_ADDR(SPH)
615 out _SFR_IO_ADDR(SPH), r31
616 out _SFR_IO_ADDR(SPL), r30