+++ /dev/null
-/* serpent_asm.S */
-/*
- This file is part of the AVR-Crypto-Lib.
- Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de)
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
-*/
-
-/*
- * File: serpent_sboxes.S
- * Author: Daniel Otte
- * Date: 2008-08-07
- * License: GPLv3 or later
- * Description: Implementation of the serpent sbox function.
- *
- */
-
-#include <avr/io.h>
-#include "avr-asm-macros.S"
-
-/*
-static void serpent_lt(uint8_t *b){
- X0 = rotl32(X0, 13);
- X2 = rotl32(X2, 3);
- X1 ^= X0 ^ X2;
- X3 ^= X2 ^ (X0 << 3);
- X1 = rotl32(X1, 1);
- X3 = rotl32(X3, 7);
- X0 ^= X1 ^ X3;
- X2 ^= X3 ^ (X1 << 7);
- X0 = rotl32(X0, 5);
- X2 = rotr32(X2, 10);
-}
-*/
-
-#if 0
-A0 = 4
-A1 = 5
-A2 = 6
-A3 = 7
-B0 = 8
-B1 = 9
-B2 = 10
-B3 = 11
-C0 = 12
-C1 = 13
-C2 = 14
-C3 = 15
-D0 = 16
-D1 = 17
-D2 = 18
-D3 = 19
-T0 = 20
-T1 = 21
-T2 = 22
-T3 = 23
-
-serpent_lt:
- push_range 4, 17
- movw r26, r24
- ld A2, X+
- ld A3, X+
- ld A0, X+
- ld A1, X+
- ldi r20, 3
- mov r0, A0
-1:
- lsr r0
- ror A3
- ror A2
- ror A1
- ror A0
- dec r20
- brne 1b
- ld B0, X+
- ld B1, X+
- ld B2, X+
- ld B3, X+
-
- ld C2, X+
- ld C3, X+
- ld C0, X+
- ld C1, X+
- ldi r20, 3
- mov r0, C0
-1:
- lsr r0
- ror C3
- ror C2
- ror C1
- ror C0
- dec r20
- brne 1b
-
- ld D0, X+
- ld D1, X+
- ld D2, X+
- ld D3, X+
- /* X1 ^= X0 ^ X2; */
- eor B0, A0
- eor B0, C0
- eor B1, A1
- eor B1, C1
- eor B2, A2
- eor B2, C2
- eor B3, A3
- eor B3, C3
- /* X3 ^= X2 ^ (X0 << 3); */
- mov T0, A0
- mov T1, A1
- mov T2, A2
- mov T3, A3
- ldi r24, 3
-1:
- lsl T0
- rol T1
- rol T2
- rol T3
- dec r24
- brne 1b
- eor C0, B0
- eor C0, T0
- eor C1, B1
- eor C1, T1
- eor C2, B2
- eor C2, T2
- eor C3, B3
- eor C3, T3
- /* X1 = rotl32(X1, 1); */
- mov r0, B3
- lsl r0
- rol B0
- rol B1
- rol B2
- rol B3
- /* X3 = rotl32(X3, 7); */
- mov r0, D3
- mov D3, D2
- mov D2, D1
- mov D1, D0
- mov D0, r0
- lsr r0
- ror D3
- ror D2
- ror D1
- ror D0
- /* X0 ^= X1 ^ X3; */
- eor A0, B0
- eor A0, D0
- eor A1, B1
- eor A1, D1
- eor A2, B2
- eor A2, D2
- eor A3, B3
- eor A3, D3
- /* X2 ^= X3 ^ (X1 << 7); */
- mov T1, B0
- mov T2, B1
- mov T3, B2
- clr T0
- mov r0, B3
- lsr r0
- ror T2
- ror T1
- ror T0
- eor C0, D0
- eor C0, T0
- eor C1, D1
- eor C1, T1
- eor C2, D2
- eor C2, T2
- eor C3, D3
- eor C3, T3
- /* X0 = rotl32(X0, 5); */
- ldi r24, 5
- mov r0, A3
-1:
- lsl r0
- rol A0
- rol A1
- rol A2
- rol A3
- dec r24
- brne 1b
- /* X2 = rotr32(X2, 10); */
- mov r0, C0
- mov C0, C1
- mov C1, C2
- mov C2, C3
- mov C3, r0
- ldi r24, 2
-1:
- lsr r0
- ror C2
- ror C1
- ror C0
- ror C3
- dec r24
- brne 1b
-
- clr r31
- ldi r30, D3+1
- ldi r24, 16
-1:
- ld r0, -Z
- st -X, r0
- dec r24
- brne 1b
-
- pop_range 4, 17
- ret
-#endif
-
-T0 = 22
-T1 = 23
-T2 = 24
-T3 = 25
-TT = 21
-/* rotate the data word (4 byte) pointed to by X by r20 bits to the right */
-memrotr32:
- ld T0, X+
- ld T1, X+
- ld T2, X+
- ld T3, X+
- mov TT, T0
-1:
- lsr TT
- ror T3
- ror T2
- ror T1
- ror T0
- dec r20
- brne 1b
- st -X, T3
- st -X, T2
- st -X, T1
- st -X, T0
- ret
-
-/* rotate the data word (4 byte) pointed to by X by r20 bits to the left */
-memrotl32:
- ld T0, X+
- ld T1, X+
- ld T2, X+
- ld T3, X+
- mov TT, T3
-1:
- lsl TT
- rol T0
- rol T1
- rol T2
- rol T3
- dec r20
- brne 1b
- st -X, T3
- st -X, T2
- st -X, T1
- st -X, T0
- ret
-
-/* xor the dataword (4 byte) pointed by Z into X */
-memeor32:
- ldi T2, 4
-1:
- ld T0, X
- ld T1, Z+
- eor T0, T1
- st X+, T0
- dec T2
- brne 1b
- ret
-
-serpent_lt:
- /* X0 := X0 <<< 13 */
- movw r26, r24
- ldi r20, 7
- rcall memrotl32
- ldi r20, 6
- rcall memrotl32
- /* X2 := X2 <<< 3 */
- adiw r26, 8
- ldi r20, 3
- rcall memrotl32
- /* X1 ^= X2 */
- movw r30, r26
- sbiw r26, 4
- rcall memeor32
- /* X1 ^= X0 */
- sbiw r26, 4
- sbiw r30, 12
- rcall memeor32
- /* X3 ^= X2 */
- movw r30, r26
- adiw r26, 4
- rcall memeor32
- /* T := X0 */
- sbiw r26, 16
- ld r18, X+
- ld r19, X+
- ld r20, X+
- ld r21, X+
- /* T := T<<3 */
- ldi r22, 3
-1:
- lsl r18
- rol r19
- rol r20
- rol r21
- dec r22
- brne 1b
- clr r31
- /* X3 ^= T */
- adiw r26, 8
- ldi r30, 18
- rcall memeor32
- /* X1 := X1<<<1 */
- sbiw r26, 12
- ldi r20, 1
- rcall memrotl32
- /* X3 := X3<<<7 */
- adiw r26, 8
- ldi r20, 7
- rcall memrotl32
- /* X0 ^= X3 */
- movw r30, r26
- sbiw r26, 12
- rcall memeor32
- /* X0 ^= X1 */
- movw r30, r26
- sbiw r26, 4
- rcall memeor32
- /* X2 ^= X3 */
- adiw r26, 4
- adiw r30, 4
- rcall memeor32
- /* T := X1<<<8 */
- sbiw r26, 8
- ld r19, X+
- ld r20, X+
- ld r21, X+
- ld r18, X+
- /* T := T>>>1; T&=0xfffffff8 */
- lsr r18
- ror r21
- ror r20
- ror r19
- clr r18
- ror r18
- clr r31
- ldi r30, 18
- /* X2 ^= T */
- rcall memeor32
- /* X0 := X0 <<< 5 */
- sbiw r26, 12
- ldi r20, 5
- rcall memrotl32
- /* X3 := X3 >>> 10 */
- adiw r26, 8
- ldi r20, 7
- rcall memrotr32
- ldi r20, 3
- rcall memrotr32
- ret
-
-serpent_inv_lt:
- /* X0 := X0 >>> 5 */
- movw r26, r24
- ldi r20, 5
- rcall memrotr32
- /* X2 := X2 <<< 10 */
- adiw r26, 8
- ldi r20, 7
- rcall memrotl32
- ldi r20, 3
- rcall memrotl32
- /* X2 ^= X3 */
- movw r30, r26
- adiw r30, 4
- rcall memeor32
- sbiw r26, 4
- sbiw r30, 12
- /* T := X1<<7 */
- ld r19, Z+
- ld r20, Z+
- ld r21, Z+
- ld r18, Z+
- lsr r18
- ror r21
- ror r20
- ror r19
- clr r18
- ror r18
- clr r31
- /* X2 ^= T */
- ldi r30, 18
- rcall memeor32
- /* X0 ^= X1 */
- sbiw r26, 12
- movw r30, r26
- adiw r30, 4
- rcall memeor32
- /* X0 ^= X3 */
- sbiw r26, 4
- adiw r30, 4
- rcall memeor32
- /* X1 := X1>>>1 */
- ldi r20, 1
- rcall memrotr32
- /* X3 := X3>>>7 */
- adiw r26, 8
- ldi r20, 7
- rcall memrotr32
- /* X3 ^= X2 */
- sbiw r30, 8
- rcall memeor32
- sbiw r26, 4
- /* T:= X0<<3 */
- sbiw r30, 12
- ld r18, Z+
- ld r19, Z+
- ld r20, Z+
- ld r21, Z+
- ldi r24, 3
-1:
- lsl r18
- rol r19
- rol r20
- rol r21
- dec r24
- brne 1b
- /* X3 ^= T */
- clr r31
- ldi r30, 18
- rcall memeor32
- /* X1 ^= X0 */
- sbiw r26, 12
- movw r30, r26
- sbiw r30, 4
- rcall memeor32
- /* X1 ^= X2 */
- movw r26, r30
- adiw r30, 4
- rcall memeor32
- /* X2 := X2 >>> 3 */
- ldi r20, 3
- rcall memrotr32
- /* X0 := X0 >>> 13 */
- sbiw r26, 8
- ldi r20, 7
- rcall memrotr32
- ldi r20, 6
- rcall memrotr32
- ret
-
-/*
-#define GOLDEN_RATIO 0x9e3779b9l
-
-static uint32_t serpent_gen_w(uint32_t * b, uint8_t i){
- uint32_t ret;
- ret = b[0] ^ b[3] ^ b[5] ^ b[7] ^ GOLDEN_RATIO ^ (uint32_t)i;
- ret = rotl32(ret, 11);
- return ret;
-}
-*/
-/*
- * param b is passed in r24:r25
- * param i is passed in r22
- * return value is returned in r22.r23.r24.r25
- */
- /* trashes:
- * r20-r25, r30-r31
- */
-serpent_gen_w:
- movw r30, r24
- /* ^i^b[0]*/
- ld r21, Z+
- eor r22, r21
- ld r23, Z+
- ld r24, Z+
- ld r25, Z+
- /* ^b[3]^b[5]^[b7] */
- adiw r30, 4
- ldi r20, 3
-1:
- adiw r30, 4
- ld r21, Z+
- eor r22, r21
- ld r21, Z+
- eor r23, r21
- ld r21, Z+
- eor r24, r21
- ld r21, Z+
- eor r25, r21
- dec r20
- brne 1b
- /* ^0x9e3779b9l */
- ldi r21, 0xb9
- eor r22, r21
- ldi r21, 0x79
- eor r23, r21
- ldi r21, 0x37
- eor r24, r21
- ldi r21, 0x9e
- eor r25, r21
- /* <<<11 */
- mov r21, r25
- mov r25, r24
- mov r24, r23
- mov r23, r22
- mov r22, r21
- mov r21, r25
- ldi r20, 3
-1:
- lsl r21
- rol r22
- rol r23
- rol r24
- rol r25
- dec r20
- brne 1b
- ret
-
-/*
- * void serpent_init(const void* key, uint16_t keysize_b, serpent_ctx_t* ctx)
- */
-/*
- * param key is passed in r24:r25
- * param keysize is passed in r22:r23
- * param ctx is passed in r20:r21
- */
-.global serpent_init
-serpent_init:
- stack_alloc 32
- adiw r30, 1
- push_ r30, r31
- movw r26, r22
- adiw r26, 7
- tst r27
- breq 1f
- ldi r26, 32
- rjmp 2f
-1:
- lsr r26
- lsr r26
- lsr r26
-2:
- mov r22, r26
- bst r22, 5 /* store in T if we have to do the "append 1 thing"*/
- ldi r27, 32
-3: /* set buffer to zero */
- st Z+, r1
- dec r27
- brne 3b
-
- movw r26, r24 /* X points to the key */
- sbiw r30, 32
- tst r22
- breq 5f /* if keylength_b==0 */
-4: /* copy keybytes to buffer */
- ld r19, X+
- st Z+, r19
- dec r22
- brne 4b
-5:
- brts 7f /* if keylength_b == 256 */
- ldi r18, 0x01
- andi r22, 0x07
- brne 6f
- st Z, r18
- rjmp 7f
-6: /* shift the one to the right position */
- lsl r18
- dec r22
- brne 6b
- or r18, r19
- st -Z, r18
-7: /* post "appending 1 thing" buffer is ready for subkey generation */
- movw r26, r20 /* X points to the context */
-
- pop_ r19, r18 /* r18:r19 points to the buffer */
- push r16
- clr r16
-8:
- movw r24, r18
- mov r22, r16
- rcall serpent_gen_w
- movw r30, r18
- ldi r20, 7*4
-1: /* the memmove */
- ldd r0, Z+4
- st Z+, r0
- dec r20
- brne 1b
- /* store new word in buffer and context */
- st Z+, r22
- st Z+, r23
- st Z+, r24
- st Z+, r25
- st X+, r22
- st X+, r23
- st X+, r24
- st X+, r25
-
- inc r16
- cpi r16, 132
- brne 8b
-
- push_ r28, r29
- movw r28, r26
- subi r28, lo8(132*4)
- sbci r29, hi8(132*4)
- ldi r16, 33
-2:
- movw r24, r28
- adiw r28, 16
- ldi r22, 2
- add r22, r16
- rcall sbox128
- dec r16
- brne 2b
- pop_ r29, r28, r16
- stack_free 32
- ret
-
-/*
- * void serpent_enc(void* buffer, const serpent_ctx_t* ctx){
- */
-/*
- * param buffer is passed in r24:r25
- * param ctx is passed in r22:r23
- */
-.global serpent_enc
-serpent_enc:
-
- push_ r12, r13, r14, r15, r16
- clr r16
- movw r14, r24
- movw r12, r22
-1:
- movw r24, r14
- movw r22, r12
- ldi r20, 16
- add r12, r20
- adc r13, r1
- clr r21
- rcall memxor
- movw r24, r14
- mov r22, r16
- rcall sbox128
- movw r24, r14
- rcall serpent_lt
-
- inc r16
- cpi r16, 31
- brne 1b
-
- movw r24, r14
- movw r22, r12
- ldi r20, 16
- add r12, r20
- adc r13, r1
- clr r21
- rcall memxor
- movw r24, r14
- mov r22, r16
- rcall sbox128
-
- inc r16
- movw r24, r14
- movw r22, r12
- ldi r20, 16
- clr r21
- pop_ r16, r15, r14, r13, r12
- rjmp memxor
-
-/*
- * void serpent_dec(void* buffer, const serpent_ctx_t* ctx){
- */
-/*
- * param buffer is passed in r24:r25
- * param ctx is passed in r22:r23
- */
-.global serpent_dec
-serpent_dec:
- push_ r12, r13, r14, r15, r16
- movw r14, r24
-// ldi r16, lo8(32*16)
-// add r22, r16
- ldi r16, hi8(32*16)
- add r23, r16
- movw r12, r22
- ldi r20, 16
- clr r21
- rcall memxor
-
- movw r24, r14
- ldi r22, 31
- call inv_sbox128
-
- movw r24, r14
- ldi r20, 16
- sub r12, r20
- sbc r13, r1
- movw r22, r12
- clr r21
- rcall memxor
- ldi r16, 31
-1:
- dec r16
- movw r24, r14
- rcall serpent_inv_lt
- movw r24, r14
- mov r22, r16
- rcall inv_sbox128
- movw r24, r14
- ldi r20, 16
- sub r12, r20
- sbc r13, r1
- movw r22, r12
- clr r21
- rcall memxor
-
- tst r16
- brne 1b
- pop_ r16, r15, r14, r13, r12
- ret
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-