2 * noekeon assembler implementation for avr
4 * email: daniel.otte@rub.de
67 .macro op32_4t op a b c d w x y z
75 .macro op32_prefix op p q a b c d w x y z
82 ; === bigendian_rotl32 ===
83 ; this function rotates a 32bit bigendian word n bits to the left
84 ; param1: the 32-bit value
85 ; given in r25,r24,r23,r22 (r22 is most significant)
86 ; param2: the 8-bit parameter giving the number of bits to rotate
88 ; return: the rotatet 32-bit word
89 ; given in r25,r24,r23,r22
92 /* copy high bit of r22 to carry */
104 bigendian_rotl32_exit:
109 /******************************************************************************/
111 ; === bigendian_rotl32 ===
112 ; this function rotates a 32bit bigendian word n bits to the right
113 ; param1: the 32-bit value
114 ; given in r25,r24,r23,r22 (r22 is most significant)
115 ; param2: the 8-bit parameter giving the number of bits to rotate
117 ; return: the rotatet 32-bit word
118 ; given in r25,r24,r23,r22
121 /* copy high bit of r25 to carry */
133 bigendian_rotr32_exit:
137 /******************************************************************************/
139 void theta(uint32_t* k, uint32_t* a){
141 temp = a[0] ^ a[2]; temp ^= ROTR32(temp, 8) ^ ROTL32(temp, 8);
150 temp = a[1] ^ a[3]; temp ^= ROTR32(temp, 8) ^ ROTL32(temp, 8);
156 round_const: .byte 0x1B, 0x36, 0x6C, 0xD8, 0xAB, 0x4D, 0x9A, \
157 0x2F, 0x5E, 0xBC, 0x63, 0xC6, 0x97, 0x35, 0x6A, \
183 ; param1: the state in r2-r17
184 ; param2: pointer to k in X (r26,r27)
192 /* temp = a[0] ^ a[2]; temp ^= temp>>>8 ^ temp<<<8 */
193 op32_prefix mov, temp_, state0_, a,b,c,d, 0,1,2,3
194 op32_prefix eor, temp_, state2_, a,b,c,d, 0,1,2,3
201 op32_prefix eor, temp_, r, a,b,c,d, 1,1,1,1
203 /* temp is know a little bit mixed c,d,a,b (if abcd is normal order) */
215 /* state ^ k (X points to K) */
217 clr r29 /* Y points to r2 aka state0_0 */
226 sbiw r26, 16 /* set X back to key */
244 /* temp is know a little bit mixed c,d,a,b (if abcd is normal order) */
259 /******************************************************************************/
260 ; === noekeon_enc ===
262 ; param1: pointer to buffer/state (r24,r25)
263 ; param2: pointer to k (r22,r23)
271 clr r29 /* Y points at r2 aka state0_0 */
272 movw r30, r24 /* Z points at state */
276 push r22 /* 16 is also the number of rounds and gets pushed here */
283 push r1 /* push round constan2 (0x00) */
285 push r20 /* push round constan2 (0x00) */
288 ldi r30, lo8(round_const+15)
289 ldi r31, hi8(round_const+15)
297 call round /* pops rc2 & rc1 */
322 /******************************************************************************/
323 /******************************************************************************/
324 ; === noekeon_dec ===
326 ; param1: pointer to buffer/state (r24,r25)
327 ; param2: pointer to k (r22,r23)
332 /* allocate 16 bytes on the stack */
333 in r30, _SFR_IO_ADDR(SPL)
334 in r31, _SFR_IO_ADDR(SPH)
336 out _SFR_IO_ADDR(SPH), r31
337 out _SFR_IO_ADDR(SPL), r30
340 /* push state pointer */
343 movw r26, r22 /* move key ptr to X */
345 /* set stackkey to zero */
351 /* copy key to state */
361 sbiw r26, 16 /* set X back to begining of stack key */
364 /* mov state to stackkey */
372 sbiw r26, 16 /* set X back to begining of stack key */
374 /* move data from stateptr to state */
391 push r20 /* push round constant2 (0xD4) */
392 push r22 /* push round constan1 (0x00) */
395 ldi r30, lo8(round_const-1)
396 ldi r31, hi8(round_const-1)
404 call round /* pops rc2 & rc1 */
417 /* write state back */
418 pop r31 /* pop state pointer */
429 /* remove key from stack */
430 in r30, _SFR_IO_ADDR(SPL)
431 in r31, _SFR_IO_ADDR(SPH)
433 out _SFR_IO_ADDR(SPH), r31
434 out _SFR_IO_ADDR(SPL), r30
437 /******************************************************************************/
450 ldi r30, pm_lo8(bigendian_rotl32)
451 ldi r31, pm_hi8(bigendian_rotl32)
453 /* pi1 done; now gamma */
456 xchg state0_0, state3_0
457 xchg state0_1, state3_1
458 xchg state0_2, state3_2
459 xchg state0_3, state3_3
460 /* a[2] ^= a[0] ^ a[1] ^ a[3] */
461 op32 eor, state2, state0
462 op32 eor, state2, state1
463 op32 eor, state2, state3
466 ldi r30, pm_lo8(bigendian_rotr32)
467 ldi r31, pm_hi8(bigendian_rotr32)
472 /* a[1] ^= ~(a[3]|a[2])*/
493 /* a[0] ^= a[2]&a[1] */
547 /******************************************************************************/
550 void noekeon_init(void* key, noekeon_ctx_t* ctx){
553 memset(nullv, 0, 16);
554 memcpy(ctx, key, 16);
555 noekeon_enc(ctx, nullv);
560 ; === noekeon_init ===
562 ; param1: pointer to key (r24,r25)
563 ; param2: pointer to context (r22,r23)
565 in r30, _SFR_IO_ADDR(SPL)
566 in r31, _SFR_IO_ADDR(SPH)
568 out _SFR_IO_ADDR(SPH), r31
569 out _SFR_IO_ADDR(SPL), r30
574 /* set nullv(stack) to zero */
580 /* copy key data to ctx */
593 in r30, _SFR_IO_ADDR(SPL)
594 in r31, _SFR_IO_ADDR(SPH)
596 out _SFR_IO_ADDR(SPH), r31
597 out _SFR_IO_ADDR(SPL), r30