3 This file is part of the Crypto-avr-lib/microcrypt-lib.
4 Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de)
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>.
20 * noekeon assembler implementation for avr
22 * email: daniel.otte@rub.de
85 .macro op32_4t op a b c d w x y z
93 .macro op32_prefix op p q a b c d w x y z
100 ; === bigendian_rotl32 ===
101 ; this function rotates a 32bit bigendian word n bits to the left
102 ; param1: the 32-bit value
103 ; given in r25,r24,r23,r22 (r22 is most significant)
104 ; param2: the 8-bit parameter giving the number of bits to rotate
106 ; return: the rotatet 32-bit word
107 ; given in r25,r24,r23,r22
110 /* copy high bit of r22 to carry */
122 bigendian_rotl32_exit:
127 /******************************************************************************/
129 ; === bigendian_rotl32 ===
130 ; this function rotates a 32bit bigendian word n bits to the right
131 ; param1: the 32-bit value
132 ; given in r25,r24,r23,r22 (r22 is most significant)
133 ; param2: the 8-bit parameter giving the number of bits to rotate
135 ; return: the rotatet 32-bit word
136 ; given in r25,r24,r23,r22
139 /* copy high bit of r25 to carry */
151 bigendian_rotr32_exit:
155 /******************************************************************************/
157 void theta(uint32_t* k, uint32_t* a){
159 temp = a[0] ^ a[2]; temp ^= ROTR32(temp, 8) ^ ROTL32(temp, 8);
168 temp = a[1] ^ a[3]; temp ^= ROTR32(temp, 8) ^ ROTL32(temp, 8);
174 round_const: .byte 0x1B, 0x36, 0x6C, 0xD8, 0xAB, 0x4D, 0x9A, \
175 0x2F, 0x5E, 0xBC, 0x63, 0xC6, 0x97, 0x35, 0x6A, \
201 ; param1: the state in r2-r17
202 ; param2: pointer to k in X (r26,r27)
210 /* temp = a[0] ^ a[2]; temp ^= temp>>>8 ^ temp<<<8 */
211 op32_prefix mov, temp_, state0_, a,b,c,d, 0,1,2,3
212 op32_prefix eor, temp_, state2_, a,b,c,d, 0,1,2,3
219 op32_prefix eor, temp_, r, a,b,c,d, 1,1,1,1
221 /* temp is know a little bit mixed c,d,a,b (if abcd is normal order) */
233 /* state ^ k (X points to K) */
235 clr r29 /* Y points to r2 aka state0_0 */
244 sbiw r26, 16 /* set X back to key */
262 /* temp is know a little bit mixed c,d,a,b (if abcd is normal order) */
277 /******************************************************************************/
278 ; === noekeon_enc ===
280 ; param1: pointer to buffer/state (r24,r25)
281 ; param2: pointer to k (r22,r23)
289 clr r29 /* Y points at r2 aka state0_0 */
290 movw r30, r24 /* Z points at state */
294 push r22 /* 16 is also the number of rounds and gets pushed here */
301 push r1 /* push round constan2 (0x00) */
303 push r20 /* push round constan2 (0x00) */
306 ldi r30, lo8(round_const+15)
307 ldi r31, hi8(round_const+15)
315 call round /* pops rc2 & rc1 */
340 /******************************************************************************/
341 /******************************************************************************/
342 ; === noekeon_dec ===
344 ; param1: pointer to buffer/state (r24,r25)
345 ; param2: pointer to k (r22,r23)
350 /* allocate 16 bytes on the stack */
351 in r30, _SFR_IO_ADDR(SPL)
352 in r31, _SFR_IO_ADDR(SPH)
354 out _SFR_IO_ADDR(SPH), r31
355 out _SFR_IO_ADDR(SPL), r30
358 /* push state pointer */
361 movw r26, r22 /* move key ptr to X */
363 /* set stackkey to zero */
369 /* copy key to state */
379 sbiw r26, 16 /* set X back to begining of stack key */
382 /* mov state to stackkey */
390 sbiw r26, 16 /* set X back to begining of stack key */
392 /* move data from stateptr to state */
409 push r20 /* push round constant2 (0xD4) */
410 push r22 /* push round constan1 (0x00) */
413 ldi r30, lo8(round_const-1)
414 ldi r31, hi8(round_const-1)
422 call round /* pops rc2 & rc1 */
435 /* write state back */
436 pop r31 /* pop state pointer */
447 /* remove key from stack */
448 in r30, _SFR_IO_ADDR(SPL)
449 in r31, _SFR_IO_ADDR(SPH)
451 out _SFR_IO_ADDR(SPH), r31
452 out _SFR_IO_ADDR(SPL), r30
455 /******************************************************************************/
468 ldi r30, pm_lo8(bigendian_rotl32)
469 ldi r31, pm_hi8(bigendian_rotl32)
471 /* pi1 done; now gamma */
474 xchg state0_0, state3_0
475 xchg state0_1, state3_1
476 xchg state0_2, state3_2
477 xchg state0_3, state3_3
478 /* a[2] ^= a[0] ^ a[1] ^ a[3] */
479 op32 eor, state2, state0
480 op32 eor, state2, state1
481 op32 eor, state2, state3
484 ldi r30, pm_lo8(bigendian_rotr32)
485 ldi r31, pm_hi8(bigendian_rotr32)
490 /* a[1] ^= ~(a[3]|a[2])*/
511 /* a[0] ^= a[2]&a[1] */
565 /******************************************************************************/
568 void noekeon_init(void* key, noekeon_ctx_t* ctx){
571 memset(nullv, 0, 16);
572 memcpy(ctx, key, 16);
573 noekeon_enc(ctx, nullv);
578 ; === noekeon_init ===
580 ; param1: pointer to key (r24,r25)
581 ; param2: pointer to context (r22,r23)
583 in r30, _SFR_IO_ADDR(SPL)
584 in r31, _SFR_IO_ADDR(SPH)
586 out _SFR_IO_ADDR(SPH), r31
587 out _SFR_IO_ADDR(SPL), r30
592 /* set nullv(stack) to zero */
598 /* copy key data to ctx */
611 in r30, _SFR_IO_ADDR(SPL)
612 in r31, _SFR_IO_ADDR(SPH)
614 out _SFR_IO_ADDR(SPH), r31
615 out _SFR_IO_ADDR(SPL), r30