X-Git-Url: https://git.cryptolib.org/?p=avr-crypto-lib.git;a=blobdiff_plain;f=serpent%2Fserpent-sboxes-bitslice-asm.S;fp=serpent%2Fserpent-sboxes-bitslice-asm.S;h=4e0d7db01b6fd5bb06bd2dfbba14efef297a49bd;hp=0000000000000000000000000000000000000000;hb=d32eba56ce10ea6b9eff123b50d9842673b38f2b;hpb=8f855d283a31a468ea014774c4723a8b77b81644 diff --git a/serpent/serpent-sboxes-bitslice-asm.S b/serpent/serpent-sboxes-bitslice-asm.S new file mode 100644 index 0000000..4e0d7db --- /dev/null +++ b/serpent/serpent-sboxes-bitslice-asm.S @@ -0,0 +1,854 @@ +/* serpent-sboxes-bitslice.c */ +/* + This file is part of the AVR-Crypto-Lib. + Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ +/* serpent-sboxes.c + * a bitsliced implementation of the serpent sboxes + * author: Daniel Otte + * email: daniel.otte@rub.de + * license: GPLv3 + */ + +#include "avr-asm-macros.S" +IN0 = 22 +IN1 = 23 +IN2 = 24 +IN3 = 25 +OUT0 = 18 +OUT1 = 19 +OUT2 = 20 +OUT3 = 21 +T00 = 2 +T00 = 3 +T01 = 4 +T02 = 5 +T03 = 6 +T04 = 7 +T05 = 8 +T06 = 9 +T07 = 10 +T08 = 11 +T09 = 12 +T10 = 13 +T11 = 14 +T12 = 15 +T13 = 16 +T14 = 17 +T15 = 26 +T16 = 27 +T17 = 0 + +/* S0: 3 8 15 1 10 6 5 11 14 13 4 2 7 0 9 12 */ + +/* depth = 5,7,4,2, Total gates=18 */ +sb0: + mov T00, IN1 + eor T00, IN2 + mov T01, IN0 + or T01, IN3 + mov T02, IN0 + eor T02, IN1 + mov OUT3, T01 + eor OUT3, T00 + mov T04, IN2 + or T04, OUT3 + mov T05, IN0 + eor T05, IN3 + mov T06, IN1 + or T06, IN2 + mov T07, IN3 + and T07, T04 + mov T08, T02 + and T08, T06 + mov OUT2, T08 + eor OUT2, T07 + mov T10, T08 + and T10, OUT2 + mov T11, IN2 + eor T11, IN3 + mov T12, T06 + eor T12, T10 + mov T13, IN1 + and T13, T05 + mov T14, T05 + eor T14, T12 + mov OUT0, T14 + com OUT0 + mov T16, OUT0 + eor T16, T13 + mov OUT1, T11 + eor OUT1, T16 + ret + + +/* InvS0: 13 3 11 0 10 6 5 12 1 14 4 7 15 9 8 2 */ + +/* depth = 8,4,3,6, Total gates=19 */ +sb0_inv: + mov T00, IN2 + eor T00, IN3 + mov T01, IN0 + or T01, IN1 + mov T02, IN1 + or T02, IN2 + mov T03, IN2 + and T03, T00 + mov T04, T01 + eor T04, T00 + mov T05, IN0 + or T05, T03 + mov OUT2, T04 + com OUT2 + mov T07, IN1 + eor T07, IN3 + mov T08, T02 + and T08, T07 + mov T09, IN3 + or T09, OUT2 + mov OUT1, T08 + eor OUT1, T05 + mov T11, IN0 + or T11, T04 + mov T12, OUT1 + eor T12, T11 + mov T13, T02 + eor T13, T09 + mov T14, IN0 + eor T14, IN2 + mov OUT3, T13 + eor OUT3, T12 + mov T16, T04 + and T16, T12 + mov T17, T13 + or T17, T16 + mov OUT0, T14 + eor OUT0, T17 + ret + + +/* S1: 15 12 2 7 9 0 5 10 1 11 14 8 6 13 3 4 */ + +/* depth = 10,7,3,5, Total gates=18 */ +sb1: + mov T00, IN0 + or T00, IN3 + mov T01, IN2 + eor T01, IN3 + mov T02, IN1 + com T02 + mov T03, IN0 + eor T03, IN2 + mov T04, IN0 + or T04, T02 + mov T05, IN3 + and T05, T03 + mov T06, T00 + and T06, T01 + mov T07, IN1 + or T07, T05 + mov OUT2, T01 + eor OUT2, T04 + mov T09, T06 + eor T09, T07 + mov T10, T00 + eor T10, T09 + mov T11, OUT2 + eor T11, T10 + mov T12, IN1 + and T12, IN3 + mov OUT3, T09 + com OUT3 + mov OUT1, T12 + eor OUT1, T11 + mov T15, T09 + or T15, OUT1 + mov T16, T04 + and T16, T15 + mov OUT0, IN2 + eor OUT0, T16 + ret + + +/* InvS1: 5 8 2 14 15 6 12 3 11 4 7 9 1 13 10 0 */ + +/* depth = 7,4,5,3, Total gates=18 */ +sb1_inv: + mov T00, IN0 + eor T00, IN1 + mov T01, IN1 + or T01, IN3 + mov T02, IN0 + and T02, IN2 + mov T03, IN2 + eor T03, T01 + mov T04, IN0 + or T04, T03 + mov T05, T00 + and T05, T04 + mov T06, IN3 + or T06, T02 + mov T07, IN1 + eor T07, T05 + mov T08, T06 + eor T08, T05 + mov T09, T03 + or T09, T02 + mov T10, IN3 + and T10, T07 + mov OUT2, T08 + com OUT2 + mov OUT1, T09 + eor OUT1, T10 + mov T13, IN0 + or T13, OUT2 + mov T14, T05 + eor T14, OUT1 + mov OUT3, T00 + eor OUT3, T03 + mov T16, IN2 + eor T16, T14 + mov OUT0, T13 + eor OUT0, T16 + ret + +/* S2: 8 6 7 9 3 12 10 15 13 1 14 4 0 11 5 2 */ + +/* depth = 3,8,11,7, Total gates=16 */ +sb2: + mov T00, IN0 + or T00, IN2 + mov T01, IN0 + eor T01, IN1 + mov T02, IN3 + eor T02, T00 + mov OUT0, T01 + eor OUT0, T02 + mov T04, IN2 + eor T04, OUT0 + mov T05, IN1 + eor T05, T04 + mov T06, IN1 + or T06, T04 + mov T07, T00 + and T07, T05 + mov T08, T02 + eor T08, T06 + mov T09, T01 + or T09, T08 + mov OUT1, T09 + eor OUT1, T07 + mov T11, IN0 + or T11, IN3 + mov T12, T08 + eor T12, OUT1 + mov T13, IN1 + eor T13, T12 + mov OUT3, T08 + com OUT3 + mov OUT2, T11 + eor OUT2, T13 + ret + +/* InvS2: 12 9 15 4 11 14 1 2 0 3 6 13 5 8 10 7 */ + +/* depth = 3,6,8,3, Total gates=18 */ +sb2_inv: + mov T00, IN0 + eor T00, IN3 + mov T01, IN2 + eor T01, IN3 + mov T02, IN0 + and T02, IN2 + mov T03, IN1 + or T03, T01 + mov OUT0, T00 + eor OUT0, T03 + mov T05, IN0 + or T05, IN2 + mov T06, IN3 + or T06, OUT0 + mov T07, IN3 + com T07 + mov T08, IN1 + and T08, T05 + mov T09, T07 + or T09, T02 + mov T10, IN1 + and T10, T06 + mov T11, T05 + and T11, T01 + mov OUT3, T08 + eor OUT3, T09 + mov OUT1, T11 + eor OUT1, T10 + mov T14, IN2 + and T14, OUT3 + mov T15, OUT0 + eor T15, OUT1 + mov T16, T09 + eor T16, T14 + mov OUT2, T15 + eor OUT2, T16 + ret + +/* S3: 0 15 11 8 12 9 6 3 13 1 2 4 10 7 5 14 */ + +/* depth = 8,3,5,5, Total gates=18 */ +sb3: + mov T00, IN0 + eor T00, IN2 + mov T01, IN0 + or T01, IN3 + mov T02, IN0 + and T02, IN3 + mov T03, T00 + and T03, T01 + mov T04, IN1 + or T04, T02 + mov T05, IN0 + and T05, IN1 + mov T06, IN3 + eor T06, T03 + mov T07, IN2 + or T07, T05 + mov T08, IN1 + eor T08, T06 + mov T09, IN3 + and T09, T04 + mov T10, T01 + eor T10, T09 + mov OUT3, T07 + eor OUT3, T08 + mov T12, IN3 + or T12, OUT3 + mov T13, IN0 + or T13, T06 + mov T14, IN1 + and T14, T12 + mov OUT2, T07 + eor OUT2, T10 + mov OUT0, T13 + eor OUT0, T14 + mov OUT1, T04 + eor OUT1, T03 + ret + +/* InvS3: 0 9 10 7 11 14 6 13 3 5 12 2 4 8 15 1 */ + +/* depth = 3,6,4,4, Total gates=17 */ +sb3_inv: + mov T00, IN2 + or T00, IN3 + mov T01, IN0 + or T01, IN3 + mov T02, IN2 + eor T02, T01 + mov T03, IN1 + eor T03, T01 + mov T04, IN0 + eor T04, IN3 + mov T05, T03 + and T05, T02 + mov T06, IN1 + and T06, T00 + mov OUT2, T04 + eor OUT2, T05 + mov T08, IN0 + eor T08, T02 + mov OUT0, T06 + eor OUT0, T02 + mov T10, OUT0 + or T10, T04 + mov T11, T08 + and T11, T10 + mov T12, IN0 + and T12, OUT2 + mov T13, T00 + eor T13, T04 + mov OUT1, IN1 + eor OUT1, T11 + mov T15, IN1 + or T15, T12 + mov OUT3, T13 + eor OUT3, T15 + ret + +/* S4: 1 15 8 3 12 0 11 6 2 5 4 10 9 14 7 13 */ + +/* depth = 6,7,5,3, Total gates=19 */ +sb4: + mov T00, IN0 + or T00, IN1 + mov T01, IN1 + or T01, IN2 + mov T02, IN0 + eor T02, T01 + mov T03, IN1 + eor T03, IN3 + mov T04, IN3 + or T04, T02 + mov T05, IN3 + and T05, T00 + mov OUT3, T02 + eor OUT3, T05 + mov T07, OUT3 + and T07, T03 + mov T08, T03 + and T08, T04 + mov T09, IN2 + eor T09, T05 + mov T10, IN1 + and T10, IN2 + mov T11, T03 + eor T11, T07 + mov T12, T10 + or T12, T02 + mov T13, T09 + eor T13, T08 + mov T14, IN0 + and T14, T04 + mov T15, T10 + or T15, T11 + mov OUT2, T12 + eor OUT2, T07 + mov OUT1, T14 + eor OUT1, T15 + mov OUT0, T13 + com OUT0 + ret + +/* InvS4: 5 0 8 3 10 9 7 14 2 12 11 6 4 15 13 1 */ + +/* depth = 6,4,7,3, Total gates=17 */ +sb4_inv: + mov T00, IN1 + or T00, IN3 + mov T01, IN2 + or T01, IN3 + mov T02, IN0 + and T02, T00 + mov T03, IN1 + eor T03, T01 + mov T04, IN2 + eor T04, IN3 + mov T05, T02 + com T05 + mov T06, IN0 + and T06, T03 + mov OUT1, T04 + eor OUT1, T06 + mov T08, OUT1 + or T08, T05 + mov T09, IN0 + eor T09, T06 + mov T10, T00 + eor T10, T08 + mov T11, IN3 + eor T11, T03 + mov T12, IN2 + or T12, T09 + mov OUT3, T02 + eor OUT3, T11 + mov T14, IN0 + eor T14, T03 + mov OUT2, T10 + eor OUT2, T12 + mov OUT0, T14 + eor OUT0, T08 + ret + +/* S5: 15 5 2 11 4 10 9 12 0 3 14 8 13 6 7 1 */ + +/* depth = 4,6,8,6, Total gates=17 */ +sb5: + mov T00, IN1 + eor T00, IN3 + mov T01, IN1 + or T01, IN3 + mov T02, IN0 + and T02, T00 + mov T03, IN2 + eor T03, T01 + mov T04, T02 + eor T04, T03 + mov OUT0, T04 + com OUT0 + mov T06, IN0 + eor T06, T00 + mov T07, IN3 + or T07, OUT0 + mov T08, IN1 + or T08, T04 + mov T09, IN3 + eor T09, T07 + mov T10, IN1 + or T10, T06 + mov T11, T02 + or T11, OUT0 + mov T12, T06 + or T12, T09 + mov T13, T00 + eor T13, T10 + mov OUT2, T08 + eor OUT2, T12 + mov OUT1, T06 + eor OUT1, T07 + mov OUT3, T11 + eor OUT3, T13 + ret + +/* InvS5: 8 15 2 9 4 1 13 14 11 6 5 3 7 12 10 0 */ + +/* depth = 4,6,9,7, Total gates=17 */ +sb5_inv: + mov T00, IN0 + and T00, IN3 + mov T01, IN2 + eor T01, T00 + mov T02, IN0 + eor T02, IN3 + mov T03, IN1 + and T03, T01 + mov T04, IN0 + and T04, IN2 + mov OUT0, T02 + eor OUT0, T03 + mov T06, IN0 + and T06, OUT0 + mov T07, T00 + eor T07, OUT0 + mov T08, IN1 + or T08, T04 + mov T09, IN1 + com T09 + mov OUT1, T07 + eor OUT1, T08 + mov T11, T09 + or T11, T06 + mov T12, OUT0 + or T12, OUT1 + mov OUT3, T01 + eor OUT3, T11 + mov T14, T01 + eor T14, T12 + mov T15, IN1 + eor T15, IN3 + mov OUT2, T15 + eor OUT2, T14 + ret + +/* S6: 7 2 12 5 8 4 6 11 14 9 1 15 13 3 10 0 */ + +/* depth = 8,3,6,3, Total gates=19 */ +sb6: + mov T00, IN0 + and T00, IN3 + mov T01, IN1 + eor T01, IN2 + mov T02, IN0 + eor T02, IN3 + mov T03, T00 + eor T03, T01 + mov T04, IN1 + or T04, IN2 + mov OUT1, T03 + com OUT1 + mov T06, T02 + and T06, T04 + mov T07, IN1 + and T07, OUT1 + mov T08, IN0 + or T08, IN2 + mov T09, T06 + eor T09, T07 + mov T10, IN1 + or T10, IN3 + mov T11, IN2 + eor T11, T10 + mov T12, T08 + eor T12, T09 + mov OUT2, T12 + com OUT2 + mov T14, OUT1 + and T14, T02 + mov OUT3, T11 + eor OUT3, T06 + mov T16, IN0 + eor T16, IN1 + mov T17, OUT2 + eor T17, T14 + mov OUT0, T16 + eor OUT0, T17 + ret + +/* InvS6: 15 10 1 13 5 3 6 0 4 9 14 7 2 12 8 11 */ + +/* depth = 5,3,8,6, Total gates=19 */ +sb6_inv: + mov T00, IN0 + eor T00, IN2 + mov T01, IN2 + com T01 + mov T02, IN1 + and T02, T00 + mov T03, IN1 + or T03, T01 + mov T04, IN3 + or T04, T02 + mov T05, IN1 + eor T05, IN3 + mov T06, IN0 + and T06, T03 + mov T07, IN0 + or T07, T01 + mov T08, T06 + eor T08, T04 + mov OUT1, T05 + eor OUT1, T07 + mov OUT0, T08 + com OUT0 + mov T11, IN1 + and T11, OUT0 + mov T12, T00 + and T12, T04 + mov T13, T00 + eor T13, T11 + mov T14, T06 + eor T14, T12 + mov T15, IN3 + or T15, T01 + mov T16, IN0 + eor T16, OUT1 + mov OUT3, T16 + eor OUT3, T14 + mov OUT2, T15 + eor OUT2, T13 + ret + +/* S7: 1 13 15 0 14 8 2 11 7 4 12 10 9 3 5 6 */ + +/* depth = 10,7,10,4, Total gates=19 */ +sb7: + mov T00, IN0 + and T00, IN2 + mov T01, IN3 + com T01 + mov T02, IN0 + and T02, T01 + mov T03, IN1 + or T03, T00 + mov T04, IN0 + and T04, IN1 + mov T05, IN2 + eor T05, T03 + mov OUT3, T02 + eor OUT3, T05 + mov T07, IN2 + or T07, OUT3 + mov T08, IN3 + or T08, T04 + mov T09, IN0 + eor T09, T07 + mov T10, T03 + and T10, OUT3 + mov OUT1, T08 + eor OUT1, T09 + mov T12, IN1 + eor T12, OUT1 + mov T13, T00 + eor T13, OUT1 + mov T14, IN2 + eor T14, T04 + mov T15, T10 + or T15, T12 + mov T16, T01 + or T16, T13 + mov OUT0, T14 + eor OUT0, T16 + mov OUT2, IN0 + eor OUT2, T15 + ret + +/* InvS7: 3 0 6 13 9 14 15 8 5 12 11 7 10 1 4 2 */ + +/* depth = 9,7,3,3, Total gates=18 */ +sb7_inv: + mov T00, IN0 + and T00, IN1 + mov T01, IN0 + or T01, IN1 + mov T02, IN2 + or T02, T00 + mov T03, IN3 + and T03, T01 + mov OUT3, T02 + eor OUT3, T03 + mov T05, IN1 + eor T05, T03 + mov T06, IN3 + eor T06, OUT3 + mov T07, T06 + com T07 + mov T08, T05 + or T08, T07 + mov T09, IN1 + eor T09, IN3 + mov T10, IN0 + or T10, IN3 + mov OUT1, IN0 + eor OUT1, T08 + mov T12, IN2 + eor T12, T05 + mov T13, IN2 + and T13, T10 + mov T14, IN3 + or T14, OUT1 + mov T15, T00 + or T15, T09 + mov OUT0, T12 + eor OUT0, T14 + mov OUT2, T13 + eor OUT2, T15 + ret + +sf_tab: +.word sb0, sb1, sb2, sb3 +.word sb4, sb5, sb6, sb7 + +sinvf_tab: +.word sb0_inv, sb1_inv, sb2_inv, sb3_inv +.word sb4_inv, sb5_inv, sb6_inv, sb7_inv + +/* +.byte pm_lo8(sb0), pm_hi8(sb0) +.byte pm_lo8(sb1), pm_hi8(sb1) +.byte pm_lo8(sb2), pm_hi8(sb2) +.byte pm_lo8(sb3), pm_hi8(sb3) +.byte pm_lo8(sb4), pm_hi8(sb4) +.byte pm_lo8(sb5), pm_hi8(sb5) +.byte pm_lo8(sb6), pm_hi8(sb6) +.byte pm_lo8(sb7), pm_hi8(sb7) + + +sinvf_tab: +.byte pm_lo8(sb0_inv), pm_hi8(sb0_inv) +.byte pm_lo8(sb1_inv), pm_hi8(sb1_inv) +.byte pm_lo8(sb2_inv), pm_hi8(sb2_inv) +.byte pm_lo8(sb3_inv), pm_hi8(sb3_inv) +.byte pm_lo8(sb4_inv), pm_hi8(sb4_inv) +.byte pm_lo8(sb5_inv), pm_hi8(sb5_inv) +.byte pm_lo8(sb6_inv), pm_hi8(sb6_inv) +.byte pm_lo8(sb7_inv), pm_hi8(sb7_inv) +*/ +/* +void sbox128(void * w, uint8_t box){ + uint8_t i, buffer[16]; + box &= 0x7; + + sb_fpt fp; + fp = (sb_fpt)pgm_read_word(&(sf_tab[box])); + for(i=0; i<4; ++i){ + fp(buffer+i, (uint8_t*)w+i); + } + memcpy(w, buffer, 16); +} +*/ +.global sbox128 +sbox128: + ldi r30, lo8(sf_tab) + ldi r31, hi8(sf_tab) +1: +; clr r1 + andi r22, 0x07 + lsl r22 + add r30, r22 + adc r31, r1 + lpm r26, Z+ + lpm r27, Z + lsr r27 + ror r26 + push r28 + push r29 + movw r30, r26 + movw r28, r24 + push_range 2, 17 + ldd IN0, Y+0 + ldd IN1, Y+4 + ldd IN2, Y+8 + ldd IN3, Y+12 + icall + std Y+0, OUT0 + std Y+4, OUT1 + std Y+8, OUT2 + std Y+12, OUT3 + ldd IN0, Y+0+1 + ldd IN1, Y+4+1 + ldd IN2, Y+8+1 + ldd IN3, Y+12+1 + icall + std Y+0+1, OUT0 + std Y+4+1, OUT1 + std Y+8+1, OUT2 + std Y+12+1, OUT3 + ldd IN0, Y+0+2 + ldd IN1, Y+4+2 + ldd IN2, Y+8+2 + ldd IN3, Y+12+2 + icall + std Y+0+2, OUT0 + std Y+4+2, OUT1 + std Y+8+2, OUT2 + std Y+12+2, OUT3 + ldd IN0, Y+0+3 + ldd IN1, Y+4+3 + ldd IN2, Y+8+3 + ldd IN3, Y+12+3 + icall + std Y+0+3, OUT0 + std Y+4+3, OUT1 + std Y+8+3, OUT2 + std Y+12+3, OUT3 + pop_range 2, 17 + pop r29 + pop r28 + ret + +.global inv_sbox128 +inv_sbox128: + ldi r30, lo8(sinvf_tab) + ldi r31, hi8(sinvf_tab) + rjmp 1b +/* +void inv_sbox128(void * w, uint8_t box){ + uint8_t i, buffer[16]; + box &= 0x7; + + sb_fpt fp; + fp = (sb_fpt)pgm_read_word(&(sinvf_tab[box])); + for(i=0; i<4; ++i){ + fp(buffer+i, (uint8_t*)w+i); + } + memcpy(w, buffer, 16); +} +*/ + + + + + + +