/* mqq160-sign_P-asm.S */ /* This file is part of the AVR-Crypto-Lib. Copyright (C) 2010 Daniel Otte (daniel.otte@rub.de) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ /** * \file mqq160-sign_P-asm.S * \email daniel.otte@rub.de * \author Daniel Otte * \date 2010-03-21 * \license GPLv3 or later * */ #include "avr-asm-macros.S" #if 0 static void mqq_inv_affine_transformation(uint8_t* input_bytes, uint8_t* result, const mqq160_sign_key_t* key){ /* The matrix SInv is given as two permutations of 160 elements. */ uint8_t j, byteindex, bitindex, bitindex_d, byteindex_d, rp1, rp5; uint8_t *r1_ptr, *r5_ptr; uint8_t h1[20]; /* Initialize H1 and H2 = 0 */ memset(h1, 0, 20); memset(result, 0, 20); /* Fill H1 with bits of InputBytes accordingly to RP1 permutation and fill H2 with bits of InputBytes accordingly to RP5 permutation */ bitindex_d = 0x80; byteindex_d = 0; j=160; r1_ptr = key->rp1; r5_ptr = key->rp5; do{ rp1 = pgm_read_byte(r1_ptr++); rp5 = pgm_read_byte(r5_ptr++); byteindex = rp1>>3; bitindex = 0x80 >> (rp1&0x07); if (input_bytes[byteindex] & bitindex){ h1[byteindex_d] ^= bitindex_d; } byteindex = rp5>>3; bitindex = 0x80 >> (rp5&0x07); if (input_bytes[byteindex] & bitindex){ result[byteindex_d] ^= bitindex_d; } bitindex_d >>= 1; if(bitindex_d==0){ ++byteindex_d; bitindex_d = 0x80; } }while(--j); for (j=0; j<20; j++){ result[j] ^= h1[j] ^ h1[pgm_read_byte(j+mod20_table)] ^ h1[pgm_read_byte(8+j+mod20_table)] ^ h1[pgm_read_byte(12+j+mod20_table)]; } } #endif fetch_bit: lpm r0, Z+ mov r28, r0 ldi r29, 0x80 andi r28, 7 breq 3f 2: lsr r29 dec r28 brne 2b 3: mov r28, r0 lsr r28 lsr r28 lsr r28 mov r0, r29 clr r29 add r28, r24 adc r29, r25 ld r28, Y clt and r28, r0 breq 4f set 4: ret xres_0 = 18 xres_1 = 19 h_0 = 20 h_1 = 21 xrp5_0 = 22 xrp5_1 = 23 inp_0 = 24 inp_1 = 25 tmp_0 = 22 tmp_1 = 23 tmp_2 = 24 tmp_3 = 25 tmp_4 = 18 /* param input_bytes: r24:r25 param result: r22:r23 param key: r20:r21 */ ;.global mqq_inv_affine_transformation mqq_inv_affine_transformation: push r17 ; push r28 ; push r29 stack_alloc 20 adiw r30, 1 /* Z points to stack space for h1 */ movw r28, r20 /* Y points to the key struct in RAM */ movw xres_0, r22 movw r26, r30 /* X points to h1[0] */ ldd xrp5_0, Y+8 /* load pointer rp5 to xrp5 */ ldd xrp5_1, Y+9 movw h_0, r30 ldd r30, Y+6 /* load pointer to rp1 in Z */ ldd r31, Y+7 ldi r17, 20 20: rcall fetch_bit bld r1, 7 rcall fetch_bit bld r1, 6 rcall fetch_bit bld r1, 5 rcall fetch_bit bld r1, 4 rcall fetch_bit bld r1, 3 rcall fetch_bit bld r1, 2 rcall fetch_bit bld r1, 1 rcall fetch_bit bld r1, 0 st X+, r1 dec r17 brne 20b ;---- movw r26, xres_0 /* X points to result */ movw r30, xrp5_0 ldi r17, 20 20: rcall fetch_bit bld r1, 7 rcall fetch_bit bld r1, 6 rcall fetch_bit bld r1, 5 rcall fetch_bit bld r1, 4 rcall fetch_bit bld r1, 3 rcall fetch_bit bld r1, 2 rcall fetch_bit bld r1, 1 rcall fetch_bit bld r1, 0 st X+, r1 dec r17 brne 20b clr r1 ; --- now we mix result with h1 sbiw r26, 20 /* adjusting X to point at result[0] */ movw tmp_2, h_0 ldi r30, lo8(affine_mix_lut) ldi r31, hi8(affine_mix_lut) ldi r17, 20 30: ld tmp_0, X movw r28, tmp_2 ld tmp_1, Y+ movw tmp_2, r28 eor tmp_0, tmp_1 movw r28, h_0 lpm r0, Z+ mov tmp_4, r0 andi tmp_4, 0x0f add r28, tmp_4 adc r29, r1 ld tmp_1, Y eor tmp_0, tmp_1 adiw r28, 4 sbrc r0, 7 adiw r28, 4 ld tmp_1, Y eor tmp_0, tmp_1 adiw r28, 4 sbrc r0, 6 adiw r28, 4 ld tmp_1, Y eor tmp_0, tmp_1 st X+, tmp_0 dec r17 brne 30b stack_free 20 ; pop r29 ; pop r28 pop r17 ret affine_mix_lut: .byte 0x84, 0x85, 0x86, 0x87 .byte 0xC0, 0xC1, 0xC2, 0xC3 .byte 0x40, 0x41, 0x42, 0x43 .byte 0x44, 0x45, 0x46, 0x47 .byte 0x80, 0x81, 0x82, 0x83 /******************************************************************************/ xres = 20 tmp_0 = 23 tmp_1 = 22 tmp_2 = 21 tmp_3 = 19 /* param i: r24 param b1: r22 param b2: r20 param key: r18:r19 */ ;.global mqq_q mqq_q: ; push r28 ; push r29 ; stack_alloc 25, r26, r27 ; adiw r26, 1 /* X points to e[0] */ movw r28, r18 sbrs r24, 0 adiw r28, 2 ldd r30, Y+2 ldd r31, Y+3 ldi r28, 9 10: lpm r0, Z+ st X+, r0 dec r28 brne 10b sbiw r26, 9 /* adjust X to point at e[0] */ ;--- movw r28, r18 ld r30, Y+ /* Z points to a[0] in progmem */ ld r31, Y sbrs r24, 0 rjmp 40f 20: sbrs r22, 7 rjmp 30f ldi r25, 9 movw r28, r30 25: lpm r0, Z adiw r30, 9 ld r24, X eor r24, r0 st X+, r24 dec r25 brne 25b movw r30, r28 sbiw r26, 9 30: adiw r30, 1 lsl r22 breq 60f rjmp 20b 40: sbrs r22, 7 rjmp 50f ldi r25, 9 movw r28, r30 45: lpm r0, Z+ ld r24, X eor r24, r0 st X+, r24 dec r25 brne 45b movw r30, r28 sbiw r26, 9 50: adiw r30, 9 lsl r22 breq 60f rjmp 40b 60: ;------ all inputs are consumed, X points at e[0] ;------ So we finished with obtaining e0 .. e7 and e8 movw r28, r26 ldd r0, Y+8 eor xres, r0 ;--- /* We can look at the bits of e0 .. e7 as a columns of a given matrix. We want to define 8 variables that have the rows of that matrix. The variables need to be 16-bit because we will put into the upper 8 bits the bits of e0 .. e7, and the bits of the variable result will be the Least Significant Bits of a[0] ... a[7]. */ adiw r28, 9 /* Y points at a[0] */ ldi r25, 8 63: ldi r24, 8 clr tmp_0 65: ld tmp_1, X lsl tmp_1 st X+, tmp_1 rol tmp_0 dec r24 brne 65b ;--- clr tmp_1 lsl xres rol tmp_1 st Y+, tmp_1 st Y+, tmp_0 sbiw r26, 8 dec r25 brne 63b ;------- First we apply upper triangular transformation sbiw r28, 16 /* Y points at a[0] */ movw r30, r28 /* Z points at a[0] */ col = 25 ldi r24, 8 clr col 70: mov r1, col ldi tmp_3, 0x80 tst r1 breq 72f 71: lsr tmp_3 dec r1 brne 71b 72: clt movw r28, r30 /* Y points at a[row]*/ 73: ldd tmp_0, Y+1 and tmp_0, tmp_3 brne 74f set adiw r28, 2 rjmp 73b 74: /* Y points at a[row] */ /* if T is set we have to permute [Y] and [Z] */ brtc 75f ld tmp_0, Y ld tmp_1, Z st Y, tmp_1 st Z, tmp_0 ldd tmp_0, Y+1 ldd tmp_1, Z+1 std Y+1, tmp_1 std Z+1, tmp_0 75: /* permutation done */ ldi r26, 7 sub r26, col breq 78f movw r28, r30 76: adiw r28, 2 ldd tmp_0, Y+1 and tmp_0, tmp_3 breq 77f ld tmp_0, Y ld tmp_1, Z eor tmp_0, tmp_1 st Y, tmp_0 ldd tmp_0, Y+1 ldd tmp_1, Z+1 eor tmp_0, tmp_1 std Y+1, tmp_0 77: dec r26 brne 76b 78: adiw r30, 2 inc col dec r24 brne 70b 79: ;------ Then we eliminate 1s above the main diagonal ldi col, 7 ldi tmp_3, 1 sbiw r30, 2 80: movw r28, r30 mov r26, col 81: sbiw r28, 2 ldd tmp_0, Y+1 and tmp_0, tmp_3 breq 82f ld tmp_0, Y ld tmp_1, Z eor tmp_0, tmp_1 st Y, tmp_0 ldd tmp_0, Y+1 ldd tmp_1, Z+1 eor tmp_0, tmp_1 std Y+1, tmp_0 82: dec r26 brne 81b sbiw r30, 2 lsl tmp_3 dec col brne 80b 89: ;------ The result is in the Least Significant Bits of a[0] ... a[7] /* Z should point at a[0] */ ldi r25, 8 clr r24 90: ld tmp_0, Z adiw r30, 2 lsr tmp_0 rol r24 dec r25 brne 90b mqq_q_exit: ; stack_free 25 ; pop r29 ; pop r28 ret /******************************************************************************/ /* param dest: r24:r25 param hash: r22:r23 param key: r20:r21 */ dest_0 = 2 dest_1 = 3 xr1_0 = 4 xr1_1 = 5 key_0 = 6 key_1 = 7 i = 8 c = 9 qstack_0 = 10 qstack_1 = 11 .global mqq160_sign_P mqq160_sign_P: push_range 2, 11 push_range 28, 29 stack_alloc 10+20, r26, r27 /* r1[20] + key */ adiw r26, 1 /* X points to stack memory */ movw key_0, r26 /* load key structure */ movw r30, r20 ldi r18, 10 10: lpm r0, Z+ st X+, r0 dec r18 brne 10b movw xr1_0, r26 movw dest_0, r24 /* call to mqq_inv_affine_transformation(hash, dest, &key); */ movw r24, r22 movw r22, dest_0 movw r20, key_0 rcall mqq_inv_affine_transformation /* r1[0]=((uint8_t*)dest)[0]; */ movw r26, dest_0 movw r30, xr1_0 ld r0, X st Z, r0 ;---- ldi r18, 19 mov c, r18 clr i inc i stack_alloc 25, r28, r29 adiw r28, 1 movw qstack_0, r28 20: mov r24, i movw r26, xr1_0 add r26, i adc r27, r1 sbiw r26, 1 ld r22, X movw r26, dest_0 add r26, i adc r27, r1 ld r20, X movw r18, key_0 movw r26, qstack_0 rcall mqq_q movw r26, xr1_0 add r26, i adc r27, r1 st X, r24 inc i dec c brne 20b stack_free 25 ;----- movw r28, key_0 ldd r30, Y+8 ldd r31, Y+9 movw r26, xr1_0 ldi r18, 20 30: lpm r20, Z+ swap r20 andi r20, 0xF0 lpm r21, Z+ andi r21, 0x0F or r20, r21 ld r21, X eor r21, r20 st X+, r21 dec r18 brne 30b ;---- movw r24, xr1_0 movw r22, dest_0 movw r20, key_0 rcall mqq_inv_affine_transformation stack_free 30 pop_range 28, 29 pop_range 2, 11 ret