3 This file is part of the AVR-Crypto-Lib.
4 Copyright (C) 2012 Daniel Otte (daniel.otte@rub.de)
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>.
21 * \email daniel.otte@rub.de
24 * \license GPLv3 or later
29 #include "avr-asm-macros.S"
43 .struct ctx_a + 8 * 5 * 5
50 .global rho_pi_idx_table
54 .byte (((2 * \j + 3 * \i) % 5) * 5 + \i) * 8
59 #define ROT_BIT(a) (( (a) <= 4) ? ((a) << 1) : (0x01 | ((8 - (a)) << 1)))
60 #define ROT_CODE(a) ((((a) / 8 + ((((a) % 8) > 4) ? 1 : 0)) << 4) | ROT_BIT(((a) % 8)))
62 const uint8_t keccak_rotate_codes[5][5] PROGMEM = {
63 { ROT_CODE( 0), ROT_CODE( 1), ROT_CODE(62), ROT_CODE(28), ROT_CODE(27) },
64 { ROT_CODE(36), ROT_CODE(44), ROT_CODE( 6), ROT_CODE(55), ROT_CODE(20) },
65 { ROT_CODE( 3), ROT_CODE(10), ROT_CODE(43), ROT_CODE(25), ROT_CODE(39) },
66 { ROT_CODE(41), ROT_CODE(45), ROT_CODE(15), ROT_CODE(21), ROT_CODE( 8) },
67 { ROT_CODE(18), ROT_CODE( 2), ROT_CODE(61), ROT_CODE(56), ROT_CODE(14) }
72 .byte 0x00, 0x02, 0x85, 0x38, 0x36
73 .byte 0x48, 0x58, 0x15, 0x73, 0x28
74 .byte 0x06, 0x14, 0x56, 0x32, 0x53
75 .byte 0x52, 0x67, 0x23, 0x37, 0x10
76 .byte 0x24, 0x04, 0x87, 0x70, 0x25
79 .byte 0x01, 0x92, 0xda, 0x70
80 .byte 0x9b, 0x21, 0xf1, 0x59
81 .byte 0x8a, 0x88, 0x39, 0x2a
82 .byte 0xbb, 0xcb, 0xd9, 0x53
83 .byte 0x52, 0xc0, 0x1a, 0x6a
84 .byte 0xf1, 0xd0, 0x21, 0x78
188 rjmp rotate64_1byte_left
189 rjmp rotate64_2byte_left
190 rjmp rotate64_3byte_left
191 rjmp rotate64_4byte_left
192 rjmp rotate64_5byte_left
193 rjmp rotate64_6byte_left
194 rjmp rotate64_7byte_left
198 void keccak_theta (uint64_t *a, uint64_t *b){
200 for(i = 0; i < 5; ++i){
201 b[i][0] = a[0][i] ^ a[1][i] ^ a[2][i] ^ a[3][i] ^ a[4][i];
206 /*********************************************
208 *********************************************
210 r24:r25 = a ; uint64_t a[5][5]
219 Z = r24:r25 + 7 + 4 * 40
226 .irp r, 0, 1, 2, 3, 4
239 /*********************************************
241 *********************************************
243 r24:r25 = a+1 ; uint64_t a[5][5]
246 a[0..4][0] ^= rol(b,1)
253 Z = r24:r25 + 7 + 4 * 40
266 .irp r, 0, 1, 2, 3, 4
277 add r19, __zero_reg__
280 .irp r, 0, 1, 2, 3, 4
292 ; a[i][j] = b[i][j] ^ ((~(b[i][(j + 1) % 5])) & (b[i][(j + 2) % 5]));
294 /*********************************************
296 *********************************************
302 a[0..7] ^= ~b[0..7] & c[0..7]
323 .global keccak_nextBlock
324 .func keccak_nextBlock
327 subi ZL, lo8(-ctx_bs)
328 sbci ZH, hi8(-ctx_bs)
348 stack_alloc_large 200, r26, r27
353 movw r30, r24 ; Z = a
382 for(i = 0; i < 5; ++i){
383 for(j = 0; j < 5; ++j){
384 a[j][i] ^= b[(4 + i) % 5][0];
389 /* a[0..4][0]{0..7} ^= b[4][0]{0..7} */
393 /* a[0..4][1]{0..7} ^= b[0][0]{0..7} */
394 subi XL, lo8(4 * 5 * 8 + 8)
395 sbci XH, hi8(4 * 5 * 8 + 8)
397 /* a[0..4][2]{0..7} ^= b[1][0]{0..7} */
400 /* a[0..4][3]{0..7} ^= b[2][0]{0..7} */
403 /* a[0..4][4]{0..7} ^= b[3][0]{0..7} */
407 for(i = 0; i < 5; ++i){
408 for(j = 0; j < 5; ++j){
409 a[j][i] ^= rotate64_1bit_left(b[(i + 1) % 5][0]);
413 /* a[0..4][0]{0..7} ^= rol(b[1][0]{0..7}) */
414 subi r24, lo8(5 * 8 - 1)
415 sbci r25, hi8(5 * 8 - 1)
416 subi XL, lo8(2 * 5 * 8 + 8)
417 sbci XH, hi8(2 * 5 * 8 + 8)
419 /* a[0..4][1]{0..7} ^= rol(b[2][0]{0..7}) */
422 /* a[0..4][21]{0..7} ^= rol(b[3][0]{0..7}) */
425 /* a[0..4][3]{0..7} ^= rol(b[4][0]{0..7}) */
428 /* a[0..4][4]{0..7} ^= rol(b[0][0]{0..7}) */
429 subi XL, lo8(4 * 5 * 8 + 8)
430 sbci XH, hi8(4 * 5 * 8 + 8)
436 for(i = 0; i < 5; ++i){
437 for(j = 0; j < 5; ++j){
438 b[(2 * i + 3 * j) % 5][j] =
439 rotate64left_code(a[j][i], pgm_read_byte(&(keccak_rotate_codes[i][j])));
445 const uint8_t* rot_code = (const uint8_t*)keccak_rotate_codes;
446 const uint8_t* idx_idx = (const uint8_t*)rho_pi_idx_table;
447 uint64_t *a_tmp = (uint64_t*)a;
448 for(i = 0; i < 25; ++i){
449 *((uint64_t*)(((uint8_t*)b) + pgm_read_byte(idx_idx++))) =
450 rotate64left_code(*a_tmp++, pgm_read_byte(rot_code++));
459 ldi r18, lo8(keccak_rotate_codes)
460 ldi r19, hi8(keccak_rotate_codes)
462 ldi r18, lo8(rho_pi_idx_table)
463 ldi r19, hi8(rho_pi_idx_table)
486 ldi r30, pm_lo8(byte_rot_jmp_table)
487 ldi r31, pm_hi8(byte_rot_jmp_table)
497 rotate64_nbit_autodir:
499 brcc rotate64_nbit_left
501 ldi r30, pm_lo8(rotate64_1bit_right)
502 ldi r31, pm_hi8(rotate64_1bit_right)
505 ldi r30, pm_lo8(rotate64_1bit_left)
506 ldi r31, pm_hi8(rotate64_1bit_left)
532 for(i = 0; i < 5; ++i){
533 a[i][0] ^= ((~(b[i][1])) & (b[i][2]));
534 a[i][1] ^= ((~(b[i][2])) & (b[i][3]));
535 a[i][2] ^= ((~(b[i][3])) & (b[i][4]));
536 a[i][3] ^= ((~(b[i][4])) & (b[i][0]));
537 a[i][4] ^= ((~(b[i][0])) & (b[i][1]));
542 ; X points at b + 32 + 8 = b + 40 = b[1][0] has to point to b[0][0]
546 subi YL, lo8(5 * 5 * 8)
547 sbci YH, hi8(5 * 5 * 8)
578 ldi r30, lo8(keccak_rc_comp)
579 ldi r31, hi8(keccak_rc_comp)
581 adc r31, __zero_reg__
617 stack_free_large3 200
626 .global keccak224_ctx2hash
627 .func keccak224_ctx2hash
635 .global keccak384_ctx2hash
636 .func keccak384_ctx2hash
644 .global keccak512_ctx2hash
645 .func keccak512_ctx2hash
653 .global keccak256_ctx2hash
654 .func keccak256_ctx2hash
662 void keccak_ctx2hash(void* dest, uint16_t length_b, keccak_ctx_t* ctx){
663 while(length_b>=ctx->r){
664 memcpy(dest, ctx->a, ctx->bs);
665 dest = (uint8_t*)dest + ctx->bs;
667 keccak_f1600(ctx->a);
669 memcpy(dest, ctx->a, (length_b+7)/8);
672 .global keccak_ctx2hash
673 .func keccak_ctx2hash
684 ldd r10, Z+3 ; load blocksize (in bytes)
686 ; length_b = (r9:r8) ; r = (r3:r2) ; (H:L)
729 .global keccak224_init
738 .global keccak384_init
747 .global keccak512_init
756 .global keccak256_init
764 void keccak_init(uint16_t r, keccak_ctx_t* ctx){
765 memset(ctx->a, 0x00, 5 * 5 * 8);
767 ctx->bs = (uint8_t)(r / 8);
793 void keccak_lastBlock(keccak_ctx_t* ctx, const void* block, uint16_t length_b){
796 while(length_b >= ctx->r){
797 keccak_nextBlock(ctx, block);
798 block = (uint8_t*)block + ctx->bs;
801 length_B = length_b / 8;
802 memxor(ctx->a, block, length_B);
805 / * we have some single bits * /
806 t = ((uint8_t*)block)[length_B] >> (8 - (length_b & 7));
807 t |= 0x01 << (length_b & 7);
811 ctx->a[length_B] ^= t;
812 if(length_b == ctx->r - 1){
813 keccak_f1600(ctx->a);
825 .global keccak_lastBlock
826 .func keccak_lastBlock
844 rcall keccak_nextBlock
875 /* we have trailing bits */