2 * noekeon assembler implementation for avr
4 * email: daniel.otte@rub.de
29 in r28, _SFR_IO_ADDR(SREG)
35 out _SFR_IO_ADDR(SREG), r28
71 .macro op32_4t op a b c d w x y z
79 .macro op32_prefix op p q a b c d w x y z
86 .global bigendian_rotl32
87 ; === bigendian_rotl32 ===
88 ; this function rotates a 32bit bigendian word n bits to the left
89 ; param1: the 32-bit value
90 ; given in r25,r24,r23,r22 (r22 is most significant)
91 ; param2: the 8-bit parameter giving the number of bits to rotate
93 ; return: the rotatet 32-bit word
94 ; given in r25,r24,r23,r22
97 in r0, _SFR_IO_ADDR(SREG)
98 /* copy high bit of r22 to carry */
110 bigendian_rotl32_exit:
112 out _SFR_IO_ADDR(SREG), r0
116 /******************************************************************************/
118 .global bigendian_rotr32
119 ; === bigendian_rotl32 ===
120 ; this function rotates a 32bit bigendian word n bits to the right
121 ; param1: the 32-bit value
122 ; given in r25,r24,r23,r22 (r22 is most significant)
123 ; param2: the 8-bit parameter giving the number of bits to rotate
125 ; return: the rotatet 32-bit word
126 ; given in r25,r24,r23,r22
129 in r0, _SFR_IO_ADDR(SREG)
130 /* copy high bit of r25 to carry */
142 bigendian_rotr32_exit:
144 out _SFR_IO_ADDR(SREG), r0
147 /******************************************************************************/
149 void theta(uint32_t* k, uint32_t* a){
151 temp = a[0] ^ a[2]; temp ^= ROTR32(temp, 8) ^ ROTL32(temp, 8);
160 temp = a[1] ^ a[3]; temp ^= ROTR32(temp, 8) ^ ROTL32(temp, 8);
166 round_const: .byte 0x1B, 0x36, 0x6C, 0xD8, 0xAB, 0x4D, 0x9A, \
167 0x2F, 0x5E, 0xBC, 0x63, 0xC6, 0x97, 0x35, 0x6A, \
193 ; param1: the state in r2-r17
194 ; param2: pointer to k in X (r26,r27)
202 /* temp = a[0] ^ a[2]; temp ^= temp>>>8 ^ temp<<<8 */
203 op32_prefix mov, temp_, state0_, a,b,c,d, 0,1,2,3
204 op32_prefix eor, temp_, state2_, a,b,c,d, 0,1,2,3
211 op32_prefix eor, temp_, r, a,b,c,d, 1,1,1,1
213 /* temp is know a little bit mixed c,d,a,b (if abcd is normal order) */
225 /* state ^ k (X points to K) */
227 clr r29 /* Y points to r2 aka state0_0 */
236 sbiw r26, 16 /* set X back to key */
254 /* temp is know a little bit mixed c,d,a,b (if abcd is normal order) */
269 /******************************************************************************/
270 ; === noekeon_enc ===
272 ; param1: pointer to buffer/state (r24,r25)
273 ; param2: pointer to k (r22,r23)
281 clr r29 /* Y points at r2 aka state0_0 */
282 movw r30, r24 /* Z points at state */
286 push r22 /* 16 is also the number of rounds and gets pushed here */
293 push r1 /* push round constan2 (0x00) */
295 push r20 /* push round constan2 (0x00) */
298 ldi r30, lo8(round_const+15)
299 ldi r31, hi8(round_const+15)
307 call round /* pops rc2 & rc1 */
332 /******************************************************************************/
333 /******************************************************************************/
334 ; === noekeon_dec ===
336 ; param1: pointer to buffer/state (r24,r25)
337 ; param2: pointer to k (r22,r23)
342 /* allocate 16 bytes on the stack */
343 in r30, _SFR_IO_ADDR(SPL)
344 in r31, _SFR_IO_ADDR(SPH)
346 out _SFR_IO_ADDR(SPH), r31
347 out _SFR_IO_ADDR(SPL), r30
350 /* push state pointer */
353 movw r26, r22 /* move key ptr to X */
355 /* set stackkey to zero */
361 /* copy key to state */
371 sbiw r26, 16 /* set X back to begining of stack key */
374 /* mov state to stackkey */
382 sbiw r26, 16 /* set X back to begining of stack key */
384 /* move data from stateptr to state */
401 push r20 /* push round constant2 (0xD4) */
402 push r22 /* push round constan1 (0x00) */
405 ldi r30, lo8(round_const-1)
406 ldi r31, hi8(round_const-1)
414 call round /* pops rc2 & rc1 */
427 /* write state back */
428 pop r31 /* pop state pointer */
439 /* remove key from stack */
440 in r30, _SFR_IO_ADDR(SPL)
441 in r31, _SFR_IO_ADDR(SPH)
443 out _SFR_IO_ADDR(SPH), r31
444 out _SFR_IO_ADDR(SPL), r30
447 /******************************************************************************/
462 /* pi1 done; now gamma */
465 xchg state0_0, state3_0
466 xchg state0_1, state3_1
467 xchg state0_2, state3_2
468 xchg state0_3, state3_3
469 /* a[2] ^= a[0] ^ a[1] ^ a[3] */
470 op32 eor, state2, state0
471 op32 eor, state2, state1
472 op32 eor, state2, state3
474 eor state2_0, state0_0
475 eor state2_1, state0_1
476 eor state2_2, state0_2
477 eor state2_3, state0_3
478 eor state2_0, state1_0
479 eor state2_1, state1_1
480 eor state2_2, state1_2
481 eor state2_3, state1_3
482 eor state2_0, state3_0
483 eor state2_1, state3_1
484 eor state2_2, state3_2
485 eor state2_3, state3_3
493 /* a[1] ^= ~(a[3]|a[2])*/
514 /* a[0] ^= a[2]&a[1] */
534 ldi r30, lo8(bigendian_rotl32)
535 ldi r31, hi8(bigendian_rotl32)
538 ldi r30, lo8(bigendian_rotr32)
539 ldi r31, hi8(bigendian_rotr32)
578 /******************************************************************************/
581 void noekeon_init(void* key, noekeon_ctx_t* ctx){
584 memset(nullv, 0, 16);
585 memcpy(ctx, key, 16);
586 noekeon_enc(ctx, nullv);
591 ; === noekeon_init ===
593 ; param1: pointer to key (r24,r25)
594 ; param2: pointer to context (r22,r23)
596 in r30, _SFR_IO_ADDR(SPL)
597 in r31, _SFR_IO_ADDR(SPH)
599 out _SFR_IO_ADDR(SPH), r31
600 out _SFR_IO_ADDR(SPL), r30
605 /* set nullv(stack) to zero */
611 /* copy key data to ctx */
624 in r30, _SFR_IO_ADDR(SPL)
625 in r31, _SFR_IO_ADDR(SPH)
627 out _SFR_IO_ADDR(SPH), r31
628 out _SFR_IO_ADDR(SPL), r30