3 This file is part of the AVR-Crypto-Lib.
4 Copyright (C) 2012 Daniel Otte (daniel.otte@rub.de)
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>.
21 * \email daniel.otte@rub.de
24 * \license GPLv3 or later
29 #include "avr-asm-macros.S"
43 .struct ctx_a + 8 * 5 * 5
50 .global rho_pi_idx_table
54 .byte (((2 * \j + 3 * \i) % 5) * 5 + \i) * 8
59 #define ROT_BIT(a) (( (a) <= 4) ? ((a) << 1) : (0x01 | ((8 - (a)) << 1)))
60 #define ROT_CODE(a) ((((a) / 8 + ((((a) % 8) > 4) ? 1 : 0)) << 4) | ROT_BIT(((a) % 8)))
62 const uint8_t keccak_rotate_codes[5][5] PROGMEM = {
63 { ROT_CODE( 0), ROT_CODE( 1), ROT_CODE(62), ROT_CODE(28), ROT_CODE(27) },
64 { ROT_CODE(36), ROT_CODE(44), ROT_CODE( 6), ROT_CODE(55), ROT_CODE(20) },
65 { ROT_CODE( 3), ROT_CODE(10), ROT_CODE(43), ROT_CODE(25), ROT_CODE(39) },
66 { ROT_CODE(41), ROT_CODE(45), ROT_CODE(15), ROT_CODE(21), ROT_CODE( 8) },
67 { ROT_CODE(18), ROT_CODE( 2), ROT_CODE(61), ROT_CODE(56), ROT_CODE(14) }
72 .byte 0x00, 0x02, 0x85, 0x38, 0x36
73 .byte 0x48, 0x58, 0x15, 0x73, 0x28
74 .byte 0x06, 0x14, 0x56, 0x32, 0x53
75 .byte 0x52, 0x67, 0x23, 0x37, 0x10
76 .byte 0x24, 0x04, 0x87, 0x70, 0x25
79 .byte 0x01, 0x92, 0xda, 0x70
80 .byte 0x9b, 0x21, 0xf1, 0x59
81 .byte 0x8a, 0x88, 0x39, 0x2a
82 .byte 0xbb, 0xcb, 0xd9, 0x53
83 .byte 0x52, 0xc0, 0x1a, 0x6a
84 .byte 0xf1, 0xd0, 0x21, 0x78
188 rjmp rotate64_1byte_left
189 rjmp rotate64_2byte_left
190 rjmp rotate64_3byte_left
191 rjmp rotate64_4byte_left
192 rjmp rotate64_5byte_left
193 rjmp rotate64_6byte_left
194 rjmp rotate64_7byte_left
198 void keccak_theta (uint64_t *a, uint64_t *b){
200 for(i = 0; i < 5; ++i){
201 b[i][0] = a[0][i] ^ a[1][i] ^ a[2][i] ^ a[3][i] ^ a[4][i];
206 /*********************************************
208 *********************************************
210 r24:r25 = a ; uint64_t a[5][5]
219 Z = r24:r25 + 7 + 4 * 40
226 .irp r, 0, 1, 2, 3, 4
239 /*********************************************
241 *********************************************
243 r24:r25 = a+1 ; uint64_t a[5][5]
246 a[0..4][0] ^= rol(b,1)
253 Z = r24:r25 + 7 + 4 * 40
266 .irp r, 0, 1, 2, 3, 4
277 add r19, __zero_reg__
280 .irp r, 0, 1, 2, 3, 4
292 ; a[i][j] = b[i][j] ^ ((~(b[i][(j + 1) % 5])) & (b[i][(j + 2) % 5]));
294 /*********************************************
296 *********************************************
302 a[0..7] ^= ~b[0..7] & c[0..7]
323 .global keccak_nextBlock
324 .func keccak_nextBlock
327 subi ZL, lo8(-ctx_bs)
328 sbci ZH, hi8(-ctx_bs)
348 stack_alloc_large 200, r26, r27
353 movw r30, r24 ; Z = a
382 for(i = 0; i < 5; ++i){
383 for(j = 0; j < 5; ++j){
384 a[j][i] ^= b[(4 + i) % 5][0];
389 /* a[0..4][0]{0..7} ^= b[4][0]{0..7} */
393 /* a[0..4][1]{0..7} ^= b[0][0]{0..7} */
394 subi XL, lo8(4 * 5 * 8 + 8)
395 sbci XH, hi8(4 * 5 * 8 + 8)
397 /* a[0..4][2]{0..7} ^= b[1][0]{0..7} */
400 /* a[0..4][3]{0..7} ^= b[2][0]{0..7} */
403 /* a[0..4][4]{0..7} ^= b[3][0]{0..7} */
407 for(i = 0; i < 5; ++i){
408 for(j = 0; j < 5; ++j){
409 a[j][i] ^= rotate64_1bit_left(b[(i + 1) % 5][0]);
413 /* a[0..4][0]{0..7} ^= rol(b[1][0]{0..7}) */
414 subi r24, lo8(5 * 8 - 1)
415 sbci r25, hi8(5 * 8 - 1)
416 subi XL, lo8(2 * 5 * 8 + 8)
417 sbci XH, hi8(2 * 5 * 8 + 8)
419 /* a[0..4][1]{0..7} ^= rol(b[2][0]{0..7}) */
422 /* a[0..4][21]{0..7} ^= rol(b[3][0]{0..7}) */
425 /* a[0..4][3]{0..7} ^= rol(b[4][0]{0..7}) */
428 /* a[0..4][4]{0..7} ^= rol(b[0][0]{0..7}) */
429 subi XL, lo8(4 * 5 * 8 + 8)
430 sbci XH, hi8(4 * 5 * 8 + 8)
434 for(i = 0; i < 5; ++i){
435 for(j = 0; j < 5; ++j){
436 b[(2 * i + 3 * j) % 5][j] =
437 rotate64left_code(a[j][i], pgm_read_byte(&(keccak_rotate_codes[i][j])));
443 const uint8_t *rot_code = (const uint8_t*)keccak_rotate_codes;
444 const uint8_t *idx_idx = (const uint8_t*)rho_pi_idx_table;
445 uint64_t *a_tmp = (uint64_t*)a;
446 for(i = 0; i < 25; ++i){
447 *((uint64_t*)(((uint8_t*)b) + pgm_read_byte(idx_idx++))) =
448 rotate64left_code(*a_tmp++, pgm_read_byte(rot_code++));
457 ldi r18, lo8(keccak_rotate_codes)
458 ldi r19, hi8(keccak_rotate_codes)
460 ldi r18, lo8(rho_pi_idx_table)
461 ldi r19, hi8(rho_pi_idx_table)
484 ldi r30, pm_lo8(byte_rot_jmp_table)
485 ldi r31, pm_hi8(byte_rot_jmp_table)
495 rotate64_nbit_autodir:
497 brcc rotate64_nbit_left
499 ldi r30, pm_lo8(rotate64_1bit_right)
500 ldi r31, pm_hi8(rotate64_1bit_right)
503 ldi r30, pm_lo8(rotate64_1bit_left)
504 ldi r31, pm_hi8(rotate64_1bit_left)
530 for(i = 0; i < 5; ++i){
531 a[i][0] ^= ((~(b[i][1])) & (b[i][2]));
532 a[i][1] ^= ((~(b[i][2])) & (b[i][3]));
533 a[i][2] ^= ((~(b[i][3])) & (b[i][4]));
534 a[i][3] ^= ((~(b[i][4])) & (b[i][0]));
535 a[i][4] ^= ((~(b[i][0])) & (b[i][1]));
540 ; X points at b + 32 + 8 = b + 40 = b[1][0] has to point to b[0][0]
544 subi YL, lo8(5 * 5 * 8)
545 sbci YH, hi8(5 * 5 * 8)
576 ldi r30, lo8(keccak_rc_comp)
577 ldi r31, hi8(keccak_rc_comp)
579 adc r31, __zero_reg__
615 stack_free_large3 200
624 .global keccak224_ctx2hash
625 .func keccak224_ctx2hash
633 .global keccak384_ctx2hash
634 .func keccak384_ctx2hash
642 .global keccak512_ctx2hash
643 .func keccak512_ctx2hash
651 .global keccak256_ctx2hash
652 .func keccak256_ctx2hash
660 void keccak_ctx2hash(void *dest, uint16_t length_b, keccak_ctx_t *ctx){
661 while(length_b>=ctx->r){
662 memcpy(dest, ctx->a, ctx->bs);
663 dest = (uint8_t*)dest + ctx->bs;
665 keccak_f1600(ctx->a);
667 memcpy(dest, ctx->a, (length_b+7)/8);
670 .global keccak_ctx2hash
671 .func keccak_ctx2hash
682 ldd r10, Z+3 ; load blocksize (in bytes)
684 ; length_b = (r9:r8) ; r = (r3:r2) ; (H:L)
726 .global keccak224_init
735 .global keccak384_init
744 .global keccak512_init
753 .global keccak256_init
761 void keccak_init(uint16_t r, keccak_ctx_t *ctx){
762 memset(ctx->a, 0x00, 5 * 5 * 8);
764 ctx->bs = (uint8_t)(r / 8);
790 void keccak_lastBlock(keccak_ctx_t *ctx, const void *block, uint16_t length_b){
793 while(length_b >= ctx->r){
794 keccak_nextBlock(ctx, block);
795 block = (uint8_t*)block + ctx->bs;
798 length_B = length_b / 8;
799 memxor(ctx->a, block, length_B);
802 / * we have some single bits * /
803 t = ((uint8_t*)block)[length_B] >> (8 - (length_b & 7));
804 t |= 0x01 << (length_b & 7);
808 ctx->a[length_B] ^= t;
809 if(length_b == ctx->r - 1){
810 keccak_f1600(ctx->a);
822 .global keccak_lastBlock
823 .func keccak_lastBlock
841 rcall keccak_nextBlock
871 /* we have trailing bits */