3 This file is part of the AVR-Crypto-Lib.
4 Copyright (C) 2012 Daniel Otte (daniel.otte@rub.de)
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>.
21 * \email daniel.otte@rub.de
24 * \license GPLv3 or later
29 #include "avr-asm-macros.S"
43 .struct ctx_a + 8 * 5 * 5
54 .global rho_pi_idx_table
58 .byte (((2 * \j + 3 * \i) % 5) * 5 + \i) * 8
63 #define ROT_BIT(a) (( (a) <= 4) ? ((a) << 1) : (0x01 | ((8 - (a)) << 1)))
64 #define ROT_CODE(a) ((((a) / 8 + ((((a) % 8) > 4) ? 1 : 0)) << 4) | ROT_BIT(((a) % 8)))
66 const uint8_t keccak_rotate_codes[5][5] PROGMEM = {
67 { ROT_CODE( 0), ROT_CODE( 1), ROT_CODE(62), ROT_CODE(28), ROT_CODE(27) },
68 { ROT_CODE(36), ROT_CODE(44), ROT_CODE( 6), ROT_CODE(55), ROT_CODE(20) },
69 { ROT_CODE( 3), ROT_CODE(10), ROT_CODE(43), ROT_CODE(25), ROT_CODE(39) },
70 { ROT_CODE(41), ROT_CODE(45), ROT_CODE(15), ROT_CODE(21), ROT_CODE( 8) },
71 { ROT_CODE(18), ROT_CODE( 2), ROT_CODE(61), ROT_CODE(56), ROT_CODE(14) }
76 .byte 0x00, 0x02, 0x85, 0x38, 0x36
77 .byte 0x48, 0x58, 0x15, 0x73, 0x28
78 .byte 0x06, 0x14, 0x56, 0x32, 0x53
79 .byte 0x52, 0x67, 0x23, 0x37, 0x10
80 .byte 0x24, 0x04, 0x87, 0x70, 0x25
83 .byte 0x01, 0x92, 0xda, 0x70
84 .byte 0x9b, 0x21, 0xf1, 0x59
85 .byte 0x8a, 0x88, 0x39, 0x2a
86 .byte 0xbb, 0xcb, 0xd9, 0x53
87 .byte 0x52, 0xc0, 0x1a, 0x6a
88 .byte 0xf1, 0xd0, 0x21, 0x78
192 rjmp rotate64_1byte_left
193 rjmp rotate64_2byte_left
194 rjmp rotate64_3byte_left
195 rjmp rotate64_4byte_left
196 rjmp rotate64_5byte_left
197 rjmp rotate64_6byte_left
198 rjmp rotate64_7byte_left
202 void keccak_theta (uint64_t *a, uint64_t *b){
204 for(i = 0; i < 5; ++i){
205 b[i][0] = a[0][i] ^ a[1][i] ^ a[2][i] ^ a[3][i] ^ a[4][i];
210 /*********************************************
212 *********************************************
214 r24:r25 = a ; uint64_t a[5][5]
223 Z = r24:r25 + 7 + 4 * 40
230 .irp r, 0, 1, 2, 3, 4
243 /*********************************************
245 *********************************************
247 r24:r25 = a+1 ; uint64_t a[5][5]
250 a[0..4][0] ^= rol(b,1)
257 Z = r24:r25 + 7 + 4 * 40
270 .irp r, 0, 1, 2, 3, 4
281 add r19, __zero_reg__
284 .irp r, 0, 1, 2, 3, 4
296 ; a[i][j] = b[i][j] ^ ((~(b[i][(j + 1) % 5])) & (b[i][(j + 2) % 5]));
298 /*********************************************
300 *********************************************
306 a[0..7] ^= ~b[0..7] & c[0..7]
327 .global keccak_nextBlock
328 .func keccak_nextBlock
331 subi ZL, lo8(-ctx_bs)
332 sbci ZH, hi8(-ctx_bs)
352 stack_alloc_large 200, r26, r27
357 movw r30, r24 ; Z = a
386 for(i = 0; i < 5; ++i){
387 for(j = 0; j < 5; ++j){
388 a[j][i] ^= b[(4 + i) % 5][0];
393 /* a[0..4][0]{0..7} ^= b[4][0]{0..7} */
397 /* a[0..4][1]{0..7} ^= b[0][0]{0..7} */
398 subi XL, lo8(4 * 5 * 8 + 8)
399 sbci XH, hi8(4 * 5 * 8 + 8)
401 /* a[0..4][2]{0..7} ^= b[1][0]{0..7} */
404 /* a[0..4][3]{0..7} ^= b[2][0]{0..7} */
407 /* a[0..4][4]{0..7} ^= b[3][0]{0..7} */
411 for(i = 0; i < 5; ++i){
412 for(j = 0; j < 5; ++j){
413 a[j][i] ^= rotate64_1bit_left(b[(i + 1) % 5][0]);
417 /* a[0..4][0]{0..7} ^= rol(b[1][0]{0..7}) */
418 subi r24, lo8(5 * 8 - 1)
419 sbci r25, hi8(5 * 8 - 1)
420 subi XL, lo8(2 * 5 * 8 + 8)
421 sbci XH, hi8(2 * 5 * 8 + 8)
423 /* a[0..4][1]{0..7} ^= rol(b[2][0]{0..7}) */
426 /* a[0..4][21]{0..7} ^= rol(b[3][0]{0..7}) */
429 /* a[0..4][3]{0..7} ^= rol(b[4][0]{0..7}) */
432 /* a[0..4][4]{0..7} ^= rol(b[0][0]{0..7}) */
433 subi XL, lo8(4 * 5 * 8 + 8)
434 sbci XH, hi8(4 * 5 * 8 + 8)
440 for(i = 0; i < 5; ++i){
441 for(j = 0; j < 5; ++j){
442 b[(2 * i + 3 * j) % 5][j] =
443 rotate64left_code(a[j][i], pgm_read_byte(&(keccak_rotate_codes[i][j])));
449 const uint8_t* rot_code = (const uint8_t*)keccak_rotate_codes;
450 const uint8_t* idx_idx = (const uint8_t*)rho_pi_idx_table;
451 uint64_t *a_tmp = (uint64_t*)a;
452 for(i = 0; i < 25; ++i){
453 *((uint64_t*)(((uint8_t*)b) + pgm_read_byte(idx_idx++))) =
454 rotate64left_code(*a_tmp++, pgm_read_byte(rot_code++));
463 ldi r18, lo8(keccak_rotate_codes)
464 ldi r19, hi8(keccak_rotate_codes)
466 ldi r18, lo8(rho_pi_idx_table)
467 ldi r19, hi8(rho_pi_idx_table)
490 ldi r30, pm_lo8(byte_rot_jmp_table)
491 ldi r31, pm_hi8(byte_rot_jmp_table)
501 rotate64_nbit_autodir:
503 brcc rotate64_nbit_left
505 ldi r30, pm_lo8(rotate64_1bit_right)
506 ldi r31, pm_hi8(rotate64_1bit_right)
509 ldi r30, pm_lo8(rotate64_1bit_left)
510 ldi r31, pm_hi8(rotate64_1bit_left)
536 for(i = 0; i < 5; ++i){
537 a[i][0] ^= ((~(b[i][1])) & (b[i][2]));
538 a[i][1] ^= ((~(b[i][2])) & (b[i][3]));
539 a[i][2] ^= ((~(b[i][3])) & (b[i][4]));
540 a[i][3] ^= ((~(b[i][4])) & (b[i][0]));
541 a[i][4] ^= ((~(b[i][0])) & (b[i][1]));
546 ; X points at b + 32 + 8 = b + 40 = b[1][0] has to point to b[0][0]
550 subi YL, lo8(5 * 5 * 8)
551 sbci YH, hi8(5 * 5 * 8)
580 ldi r30, lo8(keccak_rc_comp)
581 ldi r31, hi8(keccak_rc_comp)
583 adc r31, __zero_reg__
619 stack_free_large3 200
628 void keccak_ctx2hash(void* dest, uint16_t length_b, keccak_ctx_t* ctx){
629 while(length_b>=ctx->r){
630 memcpy(dest, ctx->a, ctx->bs);
631 dest = (uint8_t*)dest + ctx->bs;
633 keccak_f1600(ctx->a);
635 memcpy(dest, ctx->a, (length_b+7)/8);
638 .global keccak_ctx2hash
639 .func keccak_ctx2hash
650 ldd r10, Z+3 ; load blocksize (in bytes)