3 This file is part of the AVR-Crypto-Lib.
4 Copyright (C) 2012 Daniel Otte (daniel.otte@rub.de)
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>.
21 * \email daniel.otte@rub.de
24 * \license GPLv3 or later
29 #include "avr-asm-macros.S"
43 .struct ctx_a + 8 * 5 * 5
54 .global rho_pi_idx_table
58 .byte (((2 * \j + 3 * \i) % 5) * 5 + \i) * 8
63 #define ROT_BIT(a) (( (a) <= 4) ? ((a) << 1) : (0x01 | ((8 - (a)) << 1)))
64 #define ROT_CODE(a) ((((a) / 8 + ((((a) % 8) > 4) ? 1 : 0)) << 4) | ROT_BIT(((a) % 8)))
66 const uint8_t keccak_rotate_codes[5][5] PROGMEM = {
67 { ROT_CODE( 0), ROT_CODE( 1), ROT_CODE(62), ROT_CODE(28), ROT_CODE(27) },
68 { ROT_CODE(36), ROT_CODE(44), ROT_CODE( 6), ROT_CODE(55), ROT_CODE(20) },
69 { ROT_CODE( 3), ROT_CODE(10), ROT_CODE(43), ROT_CODE(25), ROT_CODE(39) },
70 { ROT_CODE(41), ROT_CODE(45), ROT_CODE(15), ROT_CODE(21), ROT_CODE( 8) },
71 { ROT_CODE(18), ROT_CODE( 2), ROT_CODE(61), ROT_CODE(56), ROT_CODE(14) }
76 .byte 0x00, 0x02, 0x85, 0x38, 0x36
77 .byte 0x48, 0x58, 0x15, 0x73, 0x28
78 .byte 0x06, 0x14, 0x56, 0x32, 0x53
79 .byte 0x52, 0x67, 0x23, 0x37, 0x10
80 .byte 0x24, 0x04, 0x87, 0x70, 0x25
83 .byte 0x01, 0x92, 0xda, 0x70
84 .byte 0x9b, 0x21, 0xf1, 0x59
85 .byte 0x8a, 0x88, 0x39, 0x2a
86 .byte 0xbb, 0xcb, 0xd9, 0x53
87 .byte 0x52, 0xc0, 0x1a, 0x6a
88 .byte 0xf1, 0xd0, 0x21, 0x78
192 rjmp rotate64_1byte_left
193 rjmp rotate64_2byte_left
194 rjmp rotate64_3byte_left
195 rjmp rotate64_4byte_left
196 rjmp rotate64_5byte_left
197 rjmp rotate64_6byte_left
198 rjmp rotate64_7byte_left
202 void keccak_theta (uint64_t *a, uint64_t *b){
204 for(i = 0; i < 5; ++i){
205 b[i][0] = a[0][i] ^ a[1][i] ^ a[2][i] ^ a[3][i] ^ a[4][i];
210 /*********************************************
212 *********************************************
214 r24:r25 = a ; uint64_t a[5][5]
223 Z = r24:r25 + 7 + 4 * 40
230 .irp r, 0, 1, 2, 3, 4
243 /*********************************************
245 *********************************************
247 r24:r25 = a+1 ; uint64_t a[5][5]
250 a[0..4][0] ^= rol(b,1)
257 Z = r24:r25 + 7 + 4 * 40
270 .irp r, 0, 1, 2, 3, 4
281 add r19, __zero_reg__
284 .irp r, 0, 1, 2, 3, 4
296 ; a[i][j] = b[i][j] ^ ((~(b[i][(j + 1) % 5])) & (b[i][(j + 2) % 5]));
298 /*********************************************
300 *********************************************
306 a[0..7] ^= ~b[0..7] & c[0..7]
327 .global keccak_nextBlock
328 .func keccak_nextBlock
331 subi ZL, lo8(-ctx_bs)
332 sbci ZL, hi8(-ctx_bs)
350 stack_alloc_large 200, r26, r27
355 movw r30, r24 ; Z = a
384 for(i = 0; i < 5; ++i){
385 for(j = 0; j < 5; ++j){
386 a[j][i] ^= b[(4 + i) % 5][0];
391 /* a[0..4][0]{0..7} ^= b[4][0]{0..7} */
395 /* a[0..4][1]{0..7} ^= b[0][0]{0..7} */
396 subi XL, lo8(4 * 5 * 8 + 8)
397 sbci XH, hi8(4 * 5 * 8 + 8)
399 /* a[0..4][2]{0..7} ^= b[1][0]{0..7} */
402 /* a[0..4][3]{0..7} ^= b[2][0]{0..7} */
405 /* a[0..4][4]{0..7} ^= b[3][0]{0..7} */
409 for(i = 0; i < 5; ++i){
410 for(j = 0; j < 5; ++j){
411 a[j][i] ^= rotate64_1bit_left(b[(i + 1) % 5][0]);
415 /* a[0..4][0]{0..7} ^= rol(b[1][0]{0..7}) */
416 subi r24, lo8(5 * 8 - 1)
417 sbci r25, hi8(5 * 8 - 1)
418 subi XL, lo8(2 * 5 * 8 + 8)
419 sbci XH, hi8(2 * 5 * 8 + 8)
421 /* a[0..4][1]{0..7} ^= rol(b[2][0]{0..7}) */
424 /* a[0..4][21]{0..7} ^= rol(b[3][0]{0..7}) */
427 /* a[0..4][3]{0..7} ^= rol(b[4][0]{0..7}) */
430 /* a[0..4][4]{0..7} ^= rol(b[0][0]{0..7}) */
431 subi XL, lo8(4 * 5 * 8 + 8)
432 sbci XH, hi8(4 * 5 * 8 + 8)
438 for(i = 0; i < 5; ++i){
439 for(j = 0; j < 5; ++j){
440 b[(2 * i + 3 * j) % 5][j] =
441 rotate64left_code(a[j][i], pgm_read_byte(&(keccak_rotate_codes[i][j])));
447 const uint8_t* rot_code = (const uint8_t*)keccak_rotate_codes;
448 const uint8_t* idx_idx = (const uint8_t*)rho_pi_idx_table;
449 uint64_t *a_tmp = (uint64_t*)a;
450 for(i = 0; i < 25; ++i){
451 *((uint64_t*)(((uint8_t*)b) + pgm_read_byte(idx_idx++))) =
452 rotate64left_code(*a_tmp++, pgm_read_byte(rot_code++));
461 ldi r18, lo8(keccak_rotate_codes)
462 ldi r19, hi8(keccak_rotate_codes)
464 ldi r18, lo8(rho_pi_idx_table)
465 ldi r19, hi8(rho_pi_idx_table)
488 ldi r30, pm_lo8(byte_rot_jmp_table)
489 ldi r31, pm_hi8(byte_rot_jmp_table)
499 rotate64_nbit_autodir:
501 brcc rotate64_nbit_left
503 ldi r30, pm_lo8(rotate64_1bit_right)
504 ldi r31, pm_hi8(rotate64_1bit_right)
507 ldi r30, pm_lo8(rotate64_1bit_left)
508 ldi r31, pm_hi8(rotate64_1bit_left)
534 for(i = 0; i < 5; ++i){
535 a[i][0] ^= ((~(b[i][1])) & (b[i][2]));
536 a[i][1] ^= ((~(b[i][2])) & (b[i][3]));
537 a[i][2] ^= ((~(b[i][3])) & (b[i][4]));
538 a[i][3] ^= ((~(b[i][4])) & (b[i][0]));
539 a[i][4] ^= ((~(b[i][0])) & (b[i][1]));
544 ; X points at b + 32 + 8 = b + 40 = b[1][0] has to point to b[0][0]
548 subi YL, lo8(5 * 5 * 8)
549 sbci YH, hi8(5 * 5 * 8)
578 ldi r30, lo8(keccak_rc_comp)
579 ldi r31, hi8(keccak_rc_comp)
581 adc r31, __zero_reg__
617 stack_free_large3 200