From 3a80fbe29e33b818ccebbaba7f8bbe48c5ccd173 Mon Sep 17 00:00:00 2001 From: bg Date: Tue, 18 Sep 2012 20:29:48 +0200 Subject: [PATCH] forgotten bmw stuff --- bmw/autogen_f0_asm.rb | 449 + bmw/autogen_f0_asmmix.rb | 487 + bmw/autogen_f1_neon_small.rb | 213 + bmw/bmw_small-asm-cstub.c | 544 + bmw/bmw_small-asm-cstub.s | 1275 +++ bmw/bmw_small-asm.S | 220 + bmw/bmw_small_speed.s | 1994 ++++ bmw/bmw_small_speed_asm_f0.S | 118 + bmw/bmw_small_speed_asmmix_f0.S | 71 + bmw/bmw_small_speed_cstub.c | 339 + bmw/bmw_small_speed_neon.s | 17268 ++++++++++++++++++++++++++++++ bmw/f0_small_autogen.i | 207 + bmw/f0_small_autogen_mix.i | 227 + bmw/f1_autogen_neon_small.i | 330 + bmw/gen_f0_arm.rb | 59 + 15 files changed, 23801 insertions(+) create mode 100644 bmw/autogen_f0_asm.rb create mode 100644 bmw/autogen_f0_asmmix.rb create mode 100644 bmw/autogen_f1_neon_small.rb create mode 100644 bmw/bmw_small-asm-cstub.c create mode 100644 bmw/bmw_small-asm-cstub.s create mode 100644 bmw/bmw_small-asm.S create mode 100644 bmw/bmw_small_speed.s create mode 100644 bmw/bmw_small_speed_asm_f0.S create mode 100644 bmw/bmw_small_speed_asmmix_f0.S create mode 100644 bmw/bmw_small_speed_cstub.c create mode 100644 bmw/bmw_small_speed_neon.s create mode 100644 bmw/f0_small_autogen.i create mode 100644 bmw/f0_small_autogen_mix.i create mode 100644 bmw/f1_autogen_neon_small.i create mode 100644 bmw/gen_f0_arm.rb diff --git a/bmw/autogen_f0_asm.rb b/bmw/autogen_f0_asm.rb new file mode 100644 index 0000000..6ff7c22 --- /dev/null +++ b/bmw/autogen_f0_asm.rb @@ -0,0 +1,449 @@ +# autogen_f0_asm.rb +=begin + This file is part of the ARM-Crypto-Lib. + Copyright (C) 2006-2010 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +=end +=begin + tmp = +t[ 5] +t[10] +t[13] +(tr1=-t[ 7]+t[14]) ; + q[ 0] = S32_0(tmp) + h[ 1]; + tmp = +t[ 8] +t[13] +t[ 0] -(tr2=+t[ 1]+t[10]) ; + q[ 3] = S32_3(tmp) + h[ 4]; + tmp = -t[11] +t[13] -t[ 0] -t[ 3] +t[ 4] ; + q[ 6] = S32_1(tmp) + h[ 7]; + tmp = +t[ 0] +(tr0=-t[ 3]+t[ 6]) +(tr1) ; + q[ 9] = S32_4(tmp) + h[10]; + tmp = -t[ 9] -(tr0) +(tr2) ; + q[12] = S32_2(tmp) + h[13]; + tmp = -t[ 4] +(tr0=-t[ 9]+t[12]) +(tr1=-t[ 6]+t[13]) ; + q[15] = S32_0(tmp) + h[ 0]; + tmp = +t[ 7] +t[15] +t[ 0] -(tr0) ; + q[ 2] = S32_2(tmp) + h[ 3]; + tmp = +t[10] +(tr0=-t[ 2]+t[15]) +(tr2=+t[ 3]-t[12]) ; + q[ 5] = S32_0(tmp) + h[ 6]; + tmp = -t[ 5] -(tr0) +(tr1) ; + q[ 8] = S32_3(tmp) + h[ 9]; + tmp = -t[ 0] -t[ 2] +t[ 9] +(tr0=-t[ 5]+t[ 8]) ; + q[11] = S32_1(tmp) + h[12]; + tmp = -t[11] +(tr0) +(tr2) ; + q[14] = S32_4(tmp) + h[15]; + tmp = +t[ 6] +(tr0=+t[11]+t[14]) -(tr1=+t[ 8]+t[15]) ; + q[ 1] = S32_1(tmp) + h[ 2]; + tmp = +t[ 9] +t[ 1] +t[ 2] -(tr0) ; + q[ 4] = S32_4(tmp) + h[ 5]; + tmp = -t[12] -t[14] +t[ 1] -t[ 4] -t[ 5] ; + q[ 7] = S32_2(tmp) + h[ 8]; + tmp = -t[ 1] -(tr0=+t[ 4]+t[ 7]) +(tr1) ; + q[10] = S32_0(tmp) + h[11]; + tmp = +t[ 2] +t[10] +t[11] +(tr0) ; + q[13] = S32_3(tmp) + h[14]; +=end +$c_code = <. +=end +=begin + tmp = +t[ 5] +t[10] +t[13] +(tr1=-t[ 7]+t[14]) ; + q[ 0] = S32_0(tmp) + h[ 1]; + tmp = +t[ 8] +t[13] +t[ 0] -(tr2=+t[ 1]+t[10]) ; + q[ 3] = S32_3(tmp) + h[ 4]; + tmp = -t[11] +t[13] -t[ 0] -t[ 3] +t[ 4] ; + q[ 6] = S32_1(tmp) + h[ 7]; + tmp = +t[ 0] +(tr0=-t[ 3]+t[ 6]) +(tr1) ; + q[ 9] = S32_4(tmp) + h[10]; + tmp = -t[ 9] -(tr0) +(tr2) ; + q[12] = S32_2(tmp) + h[13]; + tmp = -t[ 4] +(tr0=-t[ 9]+t[12]) +(tr1=-t[ 6]+t[13]) ; + q[15] = S32_0(tmp) + h[ 0]; + tmp = +t[ 7] +t[15] +t[ 0] -(tr0) ; + q[ 2] = S32_2(tmp) + h[ 3]; + tmp = +t[10] +(tr0=-t[ 2]+t[15]) +(tr2=+t[ 3]-t[12]) ; + q[ 5] = S32_0(tmp) + h[ 6]; + tmp = -t[ 5] -(tr0) +(tr1) ; + q[ 8] = S32_3(tmp) + h[ 9]; + tmp = -t[ 0] -t[ 2] +t[ 9] +(tr0=-t[ 5]+t[ 8]) ; + q[11] = S32_1(tmp) + h[12]; + tmp = -t[11] +(tr0) +(tr2) ; + q[14] = S32_4(tmp) + h[15]; + tmp = +t[ 6] +(tr0=+t[11]+t[14]) -(tr1=+t[ 8]+t[15]) ; + q[ 1] = S32_1(tmp) + h[ 2]; + tmp = +t[ 9] +t[ 1] +t[ 2] -(tr0) ; + q[ 4] = S32_4(tmp) + h[ 5]; + tmp = -t[12] -t[14] +t[ 1] -t[ 4] -t[ 5] ; + q[ 7] = S32_2(tmp) + h[ 8]; + tmp = -t[ 1] -(tr0=+t[ 4]+t[ 7]) +(tr1) ; + q[10] = S32_0(tmp) + h[11]; + tmp = +t[ 2] +t[10] +t[11] +(tr0) ; + q[13] = S32_3(tmp) + h[14]; +=end +$c_code = < y[0].chopf.to_i} +# push_regs = push_list.collect {|x| x[0]} +# push_list.reverse.each {|x| stack << x[1]} +# asm_out += sprintf(" stmdb SP, {%s}\n", push_regs.join(', ')) +# end +# puts asm_out +# puts "DBG(0): "+regmap.inspect +# puts "DBG(1): "+to_load.inspect +# puts "DBG(2): "+to_load2.inspect + + #puts 'DBG('+__LINE__.to_s+'): regmap = '+regmap.inspect + return regmap, stack, asm_out +end + +def gen_simple_assembler(operations) + asm_out='' + stack = Array.new + accu = $registers.length-1 +# outr = $registers.length-4 +# tr0 = $registers.length-3 +# tr1 = $registers.length-2 +# tr2 = $registers.length-4 + + reg_cnt = $registers.length-1 + regmap = Array.new(reg_cnt) + reg_idx=0 + step = 0 + asm_out += sprintf(" sub SP, #%d*4\n", 16) + operations.each do |op| + asm_out += sprintf("/*=== W[%2d] ===*/\n", op.index) + regmap, stack, tstr = load_registers(regmap, stack, op, step-1) + asm_out += tstr + step += 1 + reg_hash = Hash.new + op.read_t.each do |t| + if regmap.index(t.chopf.to_i)==nil + printf("ERROR: too few registers!\n") + end + reg_hash[t.chopf.to_i]=regmap.index(t.chopf.to_i) + end + if op.write_tr0.length==2 + signs_code=op.write_tr0[0][0..0]+op.write_tr0[1][0..0] + case signs_code + when "++" + asm_out += sprintf(" add %s, %s, %s\n", $registers[regmap.index('tr0')], \ + $registers[reg_hash[op.write_tr0[0].chopf.to_i]], \ + $registers[reg_hash[op.write_tr0[1].chopf.to_i]]) + when "+-" + asm_out += sprintf(" sub %s, %s, %s\n", $registers[regmap.index('tr0')], \ + $registers[reg_hash[op.write_tr0[0].chopf.to_i]], \ + $registers[reg_hash[op.write_tr0[1].chopf.to_i]]) + when "-+" + asm_out += sprintf(" sub %s, %s, %s\n", $registers[regmap.index('tr0')], \ + $registers[reg_hash[op.write_tr0[1].chopf.to_i]], \ + $registers[reg_hash[op.write_tr0[0].chopf.to_i]]) + else + printf("ERROR: invalid signs_code (%d): %s\n", __LINE__, signs_code) + puts op.inspect + end + end + if op.write_tr1.length==2 + signs_code=op.write_tr1[0][0..0]+op.write_tr1[1][0..0] + case signs_code + when "++" + asm_out += sprintf(" add %s, %s, %s\n", $registers[regmap.index('tr1')], \ + $registers[reg_hash[op.write_tr1[0].chopf.to_i]], \ + $registers[reg_hash[op.write_tr1[1].chopf.to_i]]) + when "+-" + asm_out += sprintf(" sub %s, %s, %s\n", $registers[regmap.index('tr1')], \ + $registers[reg_hash[op.write_tr1[0].chopf.to_i]], \ + $registers[reg_hash[op.write_tr1[1].chopf.to_i]]) + when "-+" + # puts 'DBG: '+reg_hash.inspect + asm_out += sprintf(" sub %s, %s, %s\n", $registers[regmap.index('tr1')], \ + $registers[reg_hash[op.write_tr1[1].chopf.to_i]], \ + $registers[reg_hash[op.write_tr1[0].chopf.to_i]]) + else + printf("ERROR: invalid signs_code (%d): %s\n", __LINE__, signs_code) + puts op.inspect + end + end + if op.write_tr2.length==2 + signs_code=op.write_tr2[0][0..0]+op.write_tr2[1][0..0] + case signs_code + when "++" + asm_out += sprintf(" add %s, %s, %s\n", $registers[regmap.index('tr2')], \ + $registers[reg_hash[op.write_tr2[0].chopf.to_i]], \ + $registers[reg_hash[op.write_tr2[1].chopf.to_i]]) + when "+-" + asm_out += sprintf(" sub %s, %s, %s\n", $registers[regmap.index('tr2')], \ + $registers[reg_hash[op.write_tr2[0].chopf.to_i]], \ + $registers[reg_hash[op.write_tr2[1].chopf.to_i]]) + when "-+" + asm_out += sprintf(" sub %s, %s, %s\n", $registers[regmap.index('tr2')], \ + $registers[reg_hash[op.write_tr2[1].chopf.to_i]], \ + $registers[reg_hash[op.write_tr2[0].chopf.to_i]]) + else + printf("ERROR: invalid signs_code (%d): %s\n", __LINE__, signs_code) + puts op.inspect + end + end + reg_hash['0tr'] = regmap.index('tr0') + reg_hash['1tr'] = regmap.index('tr1') + reg_hash['2tr'] = regmap.index('tr2') + tr_to_delete = op.read_tr.collect { |x| x.chopf} + tr_to_delete.delete('0') if op.write_tr0.length!=0 + tr_to_delete.delete('1') if op.write_tr1.length!=0 + tr_to_delete.delete('2') if op.write_tr2.length!=0 + tr_to_delete.each do |x| + y = regmap.index('tr'+x) + regmap[y]=nil if y + # puts 'DBG('+__LINE__.to_s+') deleted tr'+x+' @ '+y.to_s + end + operations_to_do = op.read_t + op.read_tr.each {|x| operations_to_do << x+'tr'} + op.write_tr0.each {|x| operations_to_do.delete(x)} + op.write_tr1.each {|x| operations_to_do.delete(x)} + op.write_tr2.each {|x| operations_to_do.delete(x)} + operations_to_do = operations_to_do.sort + asm_out += sprintf("/*(-- should do %s --)*/\n", operations_to_do.join(', ')); + sign_code=operations_to_do[1][0..0] + case sign_code + when '+' + # puts 'DBG('+__LINE__.to_s+'): x='+operations_to_do[0]+' reg_hash='+reg_hash.inspect + asm_out += sprintf(" add %s, %s, %s\n", $registers[accu], \ + $registers[reg_hash[operations_to_do[0].chopf.xtr]], \ + $registers[reg_hash[operations_to_do[1].chopf.xtr]]) + when '-' + # puts 'DBG('+__LINE__.to_s+'): x='+x+' reg_hash='+reg_hash.inspect + asm_out += sprintf(" sub %s, %s, %s\n", $registers[accu], \ + $registers[reg_hash[operations_to_do[0].chopf.xtr]], \ + $registers[reg_hash[operations_to_do[1].chopf.xtr]]) + end + operations_to_do = operations_to_do[2..-1] + operations_to_do.each do |x| + sign_code=x[0..0] + case sign_code + when '+' + # puts 'DBG('+__LINE__.to_s+'): x='+x+' reg_hash='+reg_hash.inspect + asm_out += sprintf(" add %s, %s\n", $registers[accu], \ + $registers[reg_hash[x.chopf.xtr]]) + when '-' + asm_out += sprintf(" sub %s, %s\n", $registers[accu], \ + $registers[reg_hash[x.chopf.xtr]]) + end + end + outr = find_register_to_free(regmap, $regusemap, step) + regmap[outr]=nil + if(op.s==4) + asm_out += sprintf(" S32_4 %s\n", $registers[accu]) + asm_out += sprintf(" ldr %s, [r1, #%d*4]\n", $registers[outr], op.h) + asm_out += sprintf(" add %s, %s\n", $registers[accu], $registers[outr]) + asm_out += sprintf(" str %s, [r0, #%d*4]\n", $registers[accu], op.index) + else + asm_out += sprintf(" S32_%d %s %s\n", op.s, $registers[outr], $registers[accu]) + asm_out += sprintf(" ldr %s, [r1, #%d*4]\n", $registers[accu], op.h) + asm_out += sprintf(" add %s, %s\n", $registers[outr], $registers[accu]) + asm_out += sprintf(" str %s, [r0, #%d*4]\n", $registers[outr], op.index) + end + +# asm_out += sprintf(" str %s, [r0, #%d*4]\n", $registers[accu], op.index) + end + asm_out += sprintf(" add SP, #%d*4", 16) + return asm_out +end + +class RegMapEntry + attr_accessor :usemap + attr_accessor :nextusemap +end +=begin +class RegMap + atrr_reader :steps + atrr_reader :entrys + attr_reader :regcnt +end + +def gen_regmap_simple + +end +=end +$regusemap = Array.new + +def build_regusemap(operations) + i=0 + operations.each do |op| + op.read_t.each do |t| + x = t.chopf.to_i + if $regusemap[x]==nil + $regusemap[x]=Array.new + end + $regusemap[x]<. +=end + + +header = <. +*/ + +static inline +void bmw_small_f1(uint32_t* q, const uint32_t* m, const uint32_t* h){ + uint32_t even, odd; + uint32x4_t qq16, qq20, qq24, qq28; + uint32x4_t qm0, qm1, qm2; + uint32x4_t qk={0x55555550UL, 0x5aaaaaa5UL, 0x5ffffffaUL, 0x6555554fUL}; + uint32x4_t qkadd={0x15555554UL, 0x15555554UL, 0x15555554UL, 0x15555554UL}; + uint32x2_t dtmp0; + uint32x4x2_t q2tmp0, q2tmp1; +EOF + +footer = <>1)|q[%2d]);\n", i+16, (i%2==0)?"even":"odd ", i+14, i+14) + s += sprintf(" qm0 = veorq_u32(vshlq_u32(q2tmp0.val[1],(int32x4_t){ 3, 7, 13, 16}),\n" \ + " vshlq_u32(q2tmp0.val[1],(int32x4_t){-29,-25,-19,-16}));\n") + s += sprintf(" qm1 = veorq_u32(vshlq_u32(q2tmp1.val[1],(int32x4_t){ 19, 23, 27, 0}),\n" \ + " vshlq_u32(q2tmp1.val[1],(int32x4_t){-13, -9, -5, -2}));\n") + s += sprintf(" qm1 = vaddq_u32(qm1, qm0);\n") + s += sprintf(" dtmp0 = vadd_u32(vget_high_u32(qm1), vget_low_u32(qm1));\n") + s += sprintf(" q[%2d] += vget_lane_u32(dtmp0, 0) + vget_lane_u32(dtmp0, 1);\n", i+16) + + return s +end + + +puts header +[16,20,24,28].each {|x| puts gen_addElement(x)} +(0..1).each {|x| puts gen_expand_1(x)} +(2..15).each {|x| puts gen_expand_2(x, 2)} +puts footer \ No newline at end of file diff --git a/bmw/bmw_small-asm-cstub.c b/bmw/bmw_small-asm-cstub.c new file mode 100644 index 0000000..04bd59d --- /dev/null +++ b/bmw/bmw_small-asm-cstub.c @@ -0,0 +1,544 @@ +/* bmw_small.c */ +/* + This file is part of the ARM-Crypto-Lib. + Copyright (C) 2006-2010 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ +/* + * \file bmw_small.c + * \author Daniel Otte + * \email daniel.otte@rub.de + * \date 2009-04-27 + * \license GPLv3 or later + * + */ + +#include +#include +#include "bmw_small.h" +#include "memxor.h" + +#define SHL32(a,n) ((a)<<(n)) +#define SHR32(a,n) ((a)>>(n)) +#define ROTL32(a,n) (((a)<<(n))|((a)>>(32-(n)))) +#define ROTR32(a,n) (((a)>>(n))|((a)<<(32-(n)))) + + +#define TWEAK 1 +#if TWEAK +# define BUG24 0 +#else +# define BUG24 1 +#endif + +#define F0_HACK 0 + +#define DEBUG 0 + +#ifndef F0_HACK +# define F0_HACK 0 +#endif + +#if DEBUG + #include "cli.h" + + void ctx_dump(const bmw_small_ctx_t* ctx){ + uint8_t i; + cli_putstr("\r\n==== ctx dump ===="); + for(i=0; i<16;++i){ + cli_putstr("\r\n h["); + cli_hexdump(&i, 1); + cli_putstr("] = "); + cli_hexdump_rev(&(ctx->h[i]), 4); + } + cli_putstr("\r\n counter = "); + cli_hexdump(&(ctx->counter), 4); + } + + void dump_x(const uint32_t* q, uint8_t elements, char x){ + uint8_t i; + cli_putstr("\r\n==== "); + cli_putc(x); + cli_putstr(" dump ===="); + for(i=0; i. +*/ +/* + * \file bmw_small-asm.S + * \author Daniel Otte + * \email daniel.otte@rub.de + * \date 2010-05-23 + * \license GPLv3 or later + * + */ +.syntax unified +.text +.thumb +.align 2 +.thumb_func + /* +#define S32_1(x) ( (SHR32((x), 1)) ^ \ + (SHL32((x), 2)) ^ \ + (ROTL32((x), 8)) ^ \ + (ROTR32((x), 9)) ) + +#define S32_2(x) ( (SHR32((x), 2)) ^ \ + (SHL32((x), 1)) ^ \ + (ROTL32((x), 12)) ^ \ + (ROTR32((x), 7)) ) + +#define S32_3(x) ( (SHR32((x), 2)) ^ \ + (SHL32((x), 2)) ^ \ + (ROTL32((x), 15)) ^ \ + (ROTR32((x), 3)) ) + +#define S32_4(x) ( (SHR32((x), 1)) ^ (x)) + +#define S32_5(x) ( (SHR32((x), 2)) ^ (x)) + +*/ + +.global bmw_s32_0 +.text +.thumb +.align 2 +.thumb_func +.type bmw_s32_0, %function +bmw_s32_0: + mov r1, r0 + lsrs r0, r0, #1 + eor r0, r0, r1, lsl #3 + eor r0, r0, r1, ror #28 + eor r0, r0, r1, ror #13 + bx lr + +.global bmw_s32_1 +.text +.thumb +.align 2 +.thumb_func +.type bmw_s32_1, %function +bmw_s32_1: + mov r1, r0 + lsrs r0, r0, #1 + eor r0, r0, r1, lsl #2 + eor r0, r0, r1, ror #24 + eor r0, r0, r1, ror #9 + bx lr + +.global bmw_s32_2 +.text +.thumb +.align 2 +.thumb_func +.type bmw_s32_2, %function +bmw_s32_2: + mov r1, r0 + lsrs r0, r0, #2 + eor r0, r0, r1, lsl #1 + eor r0, r0, r1, ror #20 + eor r0, r0, r1, ror #7 + bx lr + +.global bmw_s32_3 +.text +.thumb +.align 2 +.thumb_func +.type bmw_s32_3, %function +bmw_s32_3: + mov r1, r0 + lsrs r0, r0, #2 + eor r0, r0, r1, lsl #2 + eor r0, r0, r1, ror #17 + eor r0, r0, r1, ror #3 + bx lr + +.global bmw_s32_4 +.text +.thumb +.align 2 +.thumb_func +.type bmw_s32_4, %function +bmw_s32_4: + eor r0, r0, r0, lsr #1 + bx lr + +.global bmw_s32_5 +.text +.thumb +.align 2 +.thumb_func +.type bmw_s32_5, %function +bmw_s32_5: + eor r0, r0, r0, lsr #2 + bx lr + + + +.global bmw_small_f0 +.text +.thumb +.align 2 +.thumb_func +.type bmw_small_f0, %function +/* + * param q: r0 + * param h: r1 + * param m: r2 + */ +bmw_small_f0: + push {r4-r11, r14} + sub sp, sp, #64 + mov r3, sp + adds r3, r3, #4 +10: + ldmia r1!, {r4,r6,r8,r10} + ldmia r2!, {r5,r7,r9,r11} + eors r4, r5 + eors r6, r7 + eors r8, r9 + eors r10, r11 + stmia r3!, {r4,r6,r8,r10} + + ldmia r1!, {r4,r6,r8,r10} + ldmia r2!, {r5,r7,r9,r11} + eors r4, r5 + eors r6, r7 + eors r8, r9 + eors r10, r11 + stmia r3!, {r4,r6,r8,r10} + + ldmia r1!, {r4,r6,r8,r10} + ldmia r2!, {r5,r7,r9,r11} + eors r4, r5 + eors r6, r7 + eors r8, r9 + eors r10, r11 + stmia r3!, {r4,r6,r8,r10} + + ldmia r1!, {r4,r6,r8,r10} + ldmia r2!, {r5,r7,r9,r11} + eors r4, r5 + eors r6, r7 + eors r8, r9 + eors r10, r11 + stmia r3!, {r4,r6,r8,r10} +/* --- */ + subs r1, r1, #64 + subs r3, r3, #64 +/* + q[ 0] = (+ h[ 5] - h[ 7] + h[10] + h[13] + h[14]); + q[ 3] = (+ h[ 8] - h[10] + h[13] + h[ 0] - h[ 1]); + q[ 6] = (- h[11] + h[13] - h[ 0] - h[ 3] + h[ 4]); + q[ 9] = (+ h[14] + h[ 0] - h[ 3] + h[ 6] - h[ 7]); + q[12] = (+ h[ 1] + h[ 3] - h[ 6] - h[ 9] + h[10]); + q[15] = (- h[ 4] - h[ 6] - h[ 9] + h[12] + h[13]); + q[ 2] = (+ h[ 7] + h[ 9] - h[12] + h[15] + h[ 0]); + q[ 5] = (+ h[10] - h[12] + h[15] - h[ 2] + h[ 3]); + q[ 8] = (+ h[13] - h[15] + h[ 2] - h[ 5] - h[ 6]); + q[11] = (- h[ 0] - h[ 2] - h[ 5] + h[ 8] + h[ 9]); + q[14] = (+ h[ 3] - h[ 5] + h[ 8] - h[11] - h[12]); + q[ 1] = (+ h[ 6] - h[ 8] + h[11] + h[14] - h[15]); + q[ 4] = (+ h[ 9] - h[11] - h[14] + h[ 1] + h[ 2]); + q[ 7] = (- h[12] - h[14] + h[ 1] - h[ 4] - h[ 5]); + q[10] = (+ h[15] - h[ 1] - h[ 4] - h[ 7] + h[ 8]); + q[13] = (+ h[ 2] + h[ 4] + h[ 7] + h[10] + h[11]); +*/ + ldr r4, [r3, #(5*4)] + ldr r5, [r3, #(7*4)] + ldr r6, [r3, #(10*4)] + ldr r7, [r3, #(13*4)] + ldr r8, [r3, #(14*4)] + ldr r9, [r3, #(8*4)] + ldr r10, [r3, #(11*4)] + subs r2, r4, r5 + adds r2, r2, r6 + adds r2, r2, r7 + adds r2, r2, r8 + str r2, [r0, #0] + ldr r4, [r3, #0] + ldr r5, [r3, #1] + subs r2, r9, r6 + adds r2, r2, r7 + adds r2, r2, r4 + subs r2, r2, r5 + add sp, sp, #64 + pop {r4-r11, pc} diff --git a/bmw/bmw_small_speed.s b/bmw/bmw_small_speed.s new file mode 100644 index 0000000..a4e2874 --- /dev/null +++ b/bmw/bmw_small_speed.s @@ -0,0 +1,1994 @@ + .file "bmw_small_speed.c" + .text + .align 2 + .type bmw_small_f1, %function +bmw_small_f1: + @ args = 0, pretend = 0, frame = 24 + @ frame_needed = 0, uses_anonymous_args = 0 + @ link register save eliminated. + stmfd sp!, {r4, r5, r6, r7, r8, r9, sl, fp} + ldmib r0, {r5, r6} @ phole ldm + ldr r7, [r0, #0] + ldr r8, [r0, #12] + mov r9, r5, lsr #2 + mov r3, r6, lsr #2 + ldr sl, [r0, #16] + eor r9, r9, r5, asl #1 + eor r3, r3, r6, asl #2 + mov ip, r7, lsr #1 + ldr fp, [r0, #20] + eor r9, r9, r5, ror #20 + eor r3, r3, r6, ror #17 + eor ip, ip, r7, asl #2 + mov r4, r8, lsr #1 + eor r3, r3, r6, ror #3 + eor r9, r9, r5, ror #7 + ldr r6, [r0, #24] + eor ip, ip, r7, ror #24 + eor r4, r4, r8, asl #3 + mov r5, sl, lsr #1 + eor ip, ip, r7, ror #9 + add r9, r9, r3 + ldr r7, [r0, #28] + eor r4, r4, r8, ror #28 + eor r5, r5, sl, asl #2 + mov r3, fp, lsr #2 + eor r4, r4, r8, ror #13 + add r9, r9, ip + eor r5, r5, sl, ror #24 + eor r3, r3, fp, asl #1 + mov ip, r6, lsr #2 + eor r5, r5, sl, ror #9 + add r9, r9, r4 + eor r3, r3, fp, ror #20 + eor ip, ip, r6, asl #2 + mov r4, r7, lsr #1 + ldr r8, [r0, #32] + eor r3, r3, fp, ror #7 + add r9, r9, r5 + eor ip, ip, r6, ror #17 + eor r4, r4, r7, asl #3 + ldr sl, [r0, #36] + eor ip, ip, r6, ror #3 + add r9, r9, r3 + ldr r6, [r0, #44] + eor r4, r4, r7, ror #28 + ldr fp, [r0, #40] + eor r4, r4, r7, ror #13 + mov r5, r8, lsr #1 + ldr r7, [r0, #48] + add r9, r9, ip + eor r5, r5, r8, asl #2 + mov r3, sl, lsr #2 + add r9, r9, r4 + mov r4, r6, lsr #1 + eor r5, r5, r8, ror #24 + eor r3, r3, sl, asl #1 + mov ip, fp, lsr #2 + eor r4, r4, r6, asl #3 + mov r6, r7, lsr #1 + eor r5, r5, r8, ror #9 + eor r3, r3, sl, ror #20 + ldr r8, [r0, #44] + eor ip, ip, fp, asl #2 + eor r6, r6, r7, asl #2 + eor r3, r3, sl, ror #7 + add r9, r9, r5 + eor ip, ip, fp, ror #17 + eor r6, r6, r7, ror #24 + ldr sl, [r0, #52] + eor ip, ip, fp, ror #3 + eor r6, r6, r7, ror #9 + add r9, r9, r3 + ldr r7, [r0, #8] + eor r4, r4, r8, ror #28 + eor r4, r4, r8, ror #13 + add r9, r9, ip + mov r3, sl, lsr #2 + add r9, r9, r4 + mov r5, r7, lsr #2 + ldr r4, [r0, #60] + ldr r7, [r0, #4] + eor r3, r3, sl, asl #1 + add r9, r9, r6 + ldr r6, [r0, #52] + ldr ip, [r1, #0] + ldr fp, [r0, #56] + eor r3, r3, sl, ror #20 + mov sl, r4, lsr #1 + mov r4, r7, lsr #1 + ldr r7, .L3 + sub sp, sp, #24 + eor r3, r3, r6, ror #7 + str r3, [sp, #0] + mov r8, fp, lsr #2 + ldr r3, [r0, #60] + add r7, r7, ip, ror #31 + ldr ip, [r0, #56] + eor r8, r8, fp, asl #2 + ldr r6, [r0, #8] + eor r8, r8, ip, ror #17 + eor sl, sl, r3, asl #3 + ldr ip, [r0, #4] + ldr r3, [r0, #12] + eor r5, r5, r6, asl #1 + eor r6, r4, ip, asl #2 + mov ip, r3, lsr #2 + ldr r3, [sp, #0] + add r9, r9, r3 + ldr r3, [r0, #56] + ldr fp, [r1, #12] + eor r8, r8, r3, ror #3 + ldr r3, [r0, #8] + add r7, r7, fp, ror #28 + eor r5, r5, r3, ror #20 + ldr fp, [r0, #60] + ldr r3, [r0, #4] + ldr r4, [r1, #40] + eor sl, sl, fp, ror #28 + eor fp, r6, r3, ror #24 + ldr r6, [r0, #12] + ldr r3, [r0, #16] + eor ip, ip, r6, asl #2 + add r9, r9, r8 + mov r6, r3, lsr #1 + ldr r8, [r0, #60] + ldr r3, [r0, #8] + sub r7, r7, r4, ror #21 + ldr r4, [r0, #4] + eor sl, sl, r8, ror #13 + eor r5, r5, r3, ror #7 + eor r8, fp, r4, ror #9 + ldr r3, [r0, #16] + ldr fp, [r0, #12] + add r9, r9, sl + ldr sl, [r0, #16] + eor ip, ip, fp, ror #17 + eor r6, r6, r3, asl #3 + ldr fp, [r0, #20] + add r5, r5, r8 + ldr r8, [r0, #12] + ldr r3, [r2, #28] + eor r6, r6, sl, ror #28 + eor ip, ip, r8, ror #3 + eor r6, r6, sl, ror #13 + ldr r8, [r0, #24] + ldr sl, [r0, #20] + mov r4, fp, lsr #1 + eor r7, r7, r3 + eor r4, r4, fp, asl #2 + add r9, r9, r7 + mov r3, r8, lsr #2 + eor r4, r4, sl, ror #24 + ldr fp, [r0, #32] + ldr r7, [r0, #36] + eor r4, r4, sl, ror #9 + eor r3, r3, r8, asl #1 + ldr r8, [r0, #28] + str r9, [r0, #64] + ldr sl, [r0, #24] + add r5, r5, ip + add r5, r5, r6 + eor r3, r3, sl, ror #20 + eor r3, r3, sl, ror #7 + mov ip, r8, lsr #2 + add r5, r5, r4 + eor ip, ip, r8, asl #2 + mov r6, fp, lsr #1 + add r5, r5, r3 + ldr r3, [r0, #28] + eor ip, ip, r8, ror #17 + eor r6, r6, fp, asl #3 + eor ip, ip, r3, ror #3 + mov r8, r7, lsr #1 + ldr r3, [r0, #40] + eor r6, r6, fp, ror #28 + eor r6, r6, fp, ror #13 + eor r8, r8, r7, asl #2 + add r5, r5, ip + mov sl, r3, lsr #2 + eor r8, r8, r7, ror #24 + add r5, r5, r6 + ldr r6, [r0, #40] + ldr r4, [r1, #16] + eor r8, r8, r7, ror #9 + eor sl, sl, r3, asl #1 + ldr fp, [r0, #44] + ldr r3, [r1, #4] + eor sl, sl, r6, ror #20 + add r5, r5, r8 + ldr r6, [r0, #48] + ldr r8, [r0, #40] + mov r4, r4, ror #27 + eor sl, sl, r8, ror #7 + mov ip, fp, lsr #2 + mov r7, r6, lsr #1 + add r4, r4, r3, ror #30 + ldr r3, [r0, #44] + eor ip, ip, fp, asl #2 + eor r7, r7, r6, asl #3 + add r5, r5, sl + ldr r6, [r0, #52] + ldr sl, [r0, #44] + eor ip, ip, r3, ror #17 + ldr fp, [r0, #56] + eor ip, ip, sl, ror #3 + mov r3, r6, lsr #1 + add r6, r0, #48 + ldmia r6, {r6, sl} @ phole ldm + ldr r8, [r0, #60] + add r5, r5, ip + ldr ip, [r0, #48] + eor r7, r7, r6, ror #28 + eor r3, r3, sl, asl #2 + mov r6, fp, lsr #2 + add r4, r4, #1509949440 + eor r7, r7, ip, ror #13 + eor r3, r3, sl, ror #24 + eor r6, r6, fp, asl #1 + mov ip, r8, lsr #2 + add r4, r4, #11141120 + eor r3, r3, sl, ror #9 + add r5, r5, r7 + ldr sl, [r1, #44] + eor r6, r6, fp, ror #20 + eor ip, ip, r8, asl #2 + add r4, r4, #43520 + ldr r7, [r2, #32] + eor r6, r6, fp, ror #7 + add r5, r5, r3 + eor ip, ip, r8, ror #17 + mov r3, r9, lsr #1 + add r4, r4, #165 + eor ip, ip, r8, ror #3 + add r5, r5, r6 + sub r4, r4, sl, ror #20 + eor r3, r3, r9, asl #3 + eor r4, r4, r7 + add r5, r5, ip + eor r3, r3, r9, ror #28 + eor r3, r3, r9, ror #13 + add r5, r5, r4 + ldr ip, [r0, #12] + add r5, r5, r3 + ldr r3, [r0, #20] + str r5, [r0, #68] + ldr r4, [r0, #8] + ldr r6, [r0, #28] + ldr r8, [r0, #16] + mov ip, ip, ror #29 + ldr r7, [r0, #24] + ldr sl, [r0, #36] + add ip, ip, r3, ror #25 + add r8, r8, r4 + ldr r3, [r1, #20] + ldr r4, [r0, #32] + add ip, ip, r6, ror #19 + ldr r6, [r0, #44] + ldr fp, [r1, #8] + add r8, r8, r7 + add ip, ip, sl, ror #16 + ldr r7, [r0, #40] + add r9, r0, #48 + ldmia r9, {r9, sl} @ phole ldm + add r8, r8, r4 + add ip, ip, r6, ror #13 + ldr r4, [r0, #64] + ldr r6, [r0, #60] + mov r3, r3, ror #26 + add r8, r8, r7 + add ip, ip, sl, ror #9 + ldr r7, [r0, #56] + ldr sl, [r1, #48] + add r3, r3, fp, ror #29 + add r8, r8, r9 + add ip, ip, r6, ror #5 + eor r4, r4, r4, lsr #1 + ldr r6, [r2, #36] + sub r3, r3, #-1610612730 + add r8, r8, r7 + add ip, ip, r4 + sub r3, r3, sl, ror #19 + eor r3, r3, r6 + add ip, ip, r8 + add ip, ip, r3 + eor r5, r5, r5, lsr #2 + add ip, ip, r5 + str ip, [r0, #72] + ldr r5, [r0, #16] + ldr r4, [r0, #24] + ldr r3, [r1, #24] + ldr r6, [r0, #12] + ldr r7, [r0, #20] + mov r5, r5, ror #29 + ldr r9, [r1, #12] + add r5, r5, r4, ror #25 + add r4, r0, #28 + ldmia r4, {r4, sl} @ phole ldm + add r7, r7, r6 + mov r3, r3, ror #25 + add r6, r0, #36 + ldmia r6, {r6, fp} @ phole ldm + add r3, r3, r9, ror #28 + add r7, r7, r4 + ldr r9, [r0, #44] + add r5, r5, sl, ror #19 + add r7, r7, r6 + ldr sl, [r0, #48] + add r5, r5, fp, ror #16 + add r7, r7, r9 + add r3, r3, #1711276032 + ldr r9, [r0, #56] + ldr fp, [r0, #52] + ldr r6, [r0, #64] + add r5, r5, sl, ror #13 + ldr r4, [r0, #68] + sub r3, r3, #11141120 + ldr sl, [r0, #60] + add r5, r5, r9, ror #9 + sub r3, r3, #43520 + ldr r9, [r1, #52] + add r7, r7, fp + add r5, r5, r6, ror #5 + eor r4, r4, r4, lsr #1 + ldr r6, [r2, #40] + sub r3, r3, #177 + add r7, r7, sl + add r5, r5, r4 + sub r3, r3, r9, ror #18 + eor r3, r3, r6 + add r5, r5, r7 + add r5, r5, r3 + eor ip, ip, ip, lsr #2 + add r5, r5, ip + str r5, [r0, #76] + ldr ip, [r0, #20] + ldr r4, [r0, #28] + ldr r3, [r1, #28] + ldr r6, [r0, #36] + mov ip, ip, ror #29 + ldr sl, [r1, #16] + add ip, ip, r4, ror #25 + ldr r4, [r0, #44] + add ip, ip, r6, ror #19 + mov r3, r3, ror #24 + add r3, r3, sl, ror #27 + add ip, ip, r4, ror #16 + ldr sl, [r0, #60] + ldr r9, [r0, #68] + add ip, ip, fp, ror #13 + add r3, r3, #1778384896 + add ip, ip, sl, ror #9 + add r3, r3, #11141120 + ldr r4, [r0, #72] + add ip, ip, r9, ror #5 + add r3, r3, #43520 + ldr r9, [r1, #56] + ldr fp, [r0, #64] + ldr sl, [r2, #44] + ldr r6, [r0, #8] + add r3, r3, #164 + sub r3, r3, r9, ror #17 + eor r4, r4, r4, lsr #1 + eor r3, r3, sl + rsb r6, r6, fp + add ip, ip, r4 + add r6, r6, r8 + add ip, ip, r3 + add ip, ip, r6 + eor r5, r5, r5, lsr #2 + str r6, [sp, #4] + add ip, ip, r5 + ldr r4, [r0, #24] + str ip, [r0, #80] + ldr r5, [r0, #32] + ldr r6, [r0, #40] + mov r4, r4, ror #29 + ldr r3, [r1, #32] + add r4, r4, r5, ror #25 + ldr r5, [r0, #48] + ldr r8, [r1, #20] + add r4, r4, r6, ror #19 + ldr r6, [r0, #56] + add r4, r4, r5, ror #16 + mov r3, r3, ror #23 + ldr r9, [r1, #60] + add r4, r4, r6, ror #13 + add r3, r3, r8, ror #26 + ldr r5, [r0, #76] + ldr r8, [r0, #72] + ldr sl, [r2, #48] + add r4, r4, fp, ror #9 + ldr r6, [r0, #12] + ldr fp, [r0, #68] + sub r3, r3, #-1879048185 + add r4, r4, r8, ror #5 + sub r3, r3, r9, ror #16 + eor r5, r5, r5, lsr #1 + eor r3, r3, sl + rsb r6, r6, fp + add r4, r4, r5 + add fp, r6, r7 + add r4, r4, r3 + add r4, r4, fp + eor ip, ip, ip, lsr #2 + add r4, r4, ip + str r4, [r0, #84] + ldr r5, [r0, #28] + ldr r3, [r1, #36] + ldr r7, [r1, #24] + ldr r6, [r0, #36] + ldr ip, [r0, #44] + mov r5, r5, ror #29 + mov r3, r3, ror #22 + add r5, r5, r6, ror #25 + add r3, r3, r7, ror #25 + ldr r6, [r0, #52] + add r5, r5, ip, ror #19 + add r3, r3, #1979711488 + ldr ip, [r0, #60] + add r5, r5, r6, ror #16 + sub r3, r3, #11141120 + ldr r6, [r0, #68] + ldr sl, [r1, #0] + ldr r8, [r0, #76] + add r5, r5, ip, ror #13 + sub r3, r3, #43520 + ldr ip, [r0, #80] + ldr r9, [r0, #72] + ldr r7, [r2, #52] + add r5, r5, r6, ror #9 + sub r3, r3, #178 + ldr r6, [r0, #16] + add r5, r5, r8, ror #5 + sub r3, r3, sl, ror #31 + eor ip, ip, ip, lsr #1 + ldr sl, [sp, #4] + eor r3, r3, r7 + rsb r6, r6, r9 + add r5, r5, ip + add r6, r6, sl + add r5, r5, r3 + add r5, r5, r6 + eor r4, r4, r4, lsr #2 + add r5, r5, r4 + str r5, [r0, #88] + ldr ip, [r0, #32] + ldr r3, [r1, #40] + ldr r7, [r1, #28] + ldr r4, [r0, #40] + str r6, [sp, #8] + mov ip, ip, ror #29 + ldr r6, [r0, #48] + mov r3, r3, ror #21 + add ip, ip, r4, ror #25 + add r3, r3, r7, ror #24 + ldr r4, [r0, #56] + add ip, ip, r6, ror #19 + add r3, r3, #2063597568 + ldr r6, [r0, #64] + add ip, ip, r4, ror #16 + sub r3, r3, #5570560 + ldr sl, [r1, #4] + ldr r8, [r0, #80] + add ip, ip, r6, ror #13 + ldr r4, [r0, #84] + sub r3, r3, #21760 + ldr r7, [r2, #56] + ldr r6, [r0, #20] + add ip, ip, r9, ror #9 + sub r3, r3, #93 + ldr r9, [r0, #76] + add ip, ip, r8, ror #5 + sub r3, r3, sl, ror #30 + eor r4, r4, r4, lsr #1 + eor r3, r3, r7 + rsb r9, r6, r9 + add ip, ip, r4 + add r9, r9, fp + add ip, ip, r3 + add ip, ip, r9 + eor r5, r5, r5, lsr #2 + add ip, ip, r5 + ldr r4, [r0, #36] + str ip, [r0, #92] + ldr r5, [r0, #44] + ldr r6, [r0, #52] + mov r4, r4, ror #29 + ldr r3, [r1, #44] + add r4, r4, r5, ror #25 + ldr r5, [r0, #60] + ldr r7, [r1, #32] + add r4, r4, r6, ror #19 + ldr r6, [r0, #68] + ldr r8, [r0, #76] + add r4, r4, r5, ror #16 + mov r3, r3, ror #20 + ldr sl, [r1, #8] + ldr fp, [r0, #80] + add r4, r4, r6, ror #13 + add r3, r3, r7, ror #23 + ldr r5, [r0, #88] + ldr r7, [r0, #84] + ldr r6, [r0, #24] + add r4, r4, r8, ror #9 + sub r3, r3, #-2147483640 + ldr r8, [r2, #60] + add r4, r4, r7, ror #5 + sub r3, r3, sl, ror #29 + eor r5, r5, r5, lsr #1 + rsb r6, r6, fp + ldr fp, [sp, #8] + eor r3, r3, r8 + add r4, r4, r5 + add r6, r6, fp + add r4, r4, r3 + add r4, r4, r6 + eor ip, ip, ip, lsr #2 + add r4, r4, ip + str r4, [r0, #96] + ldr r5, [r0, #40] + ldr r3, [r1, #48] + ldr r7, [r1, #36] + str r6, [sp, #12] + ldr r6, [r0, #48] + ldr ip, [r0, #56] + mov r5, r5, ror #29 + mov r3, r3, ror #19 + add r5, r5, r6, ror #25 + add r3, r3, r7, ror #22 + ldr r6, [r0, #64] + add r5, r5, ip, ror #19 + add r3, r3, #-2063597568 + ldr ip, [r0, #72] + add r5, r5, r6, ror #16 + add r3, r3, #5570560 + ldr r6, [r0, #80] + ldr sl, [r1, #12] + ldr r8, [r0, #88] + add r5, r5, ip, ror #13 + add r3, r3, #21760 + ldr ip, [r0, #92] + ldr fp, [r0, #84] + ldr r7, [r2, #0] + add r5, r5, r6, ror #9 + add r3, r3, #77 + ldr r6, [r0, #28] + add r5, r5, r8, ror #5 + sub r3, r3, sl, ror #28 + eor ip, ip, ip, lsr #1 + eor r3, r3, r7 + rsb r6, r6, fp + add r5, r5, ip + add r6, r6, r9 + add r5, r5, r3 + add r5, r5, r6 + eor r4, r4, r4, lsr #2 + add r5, r5, r4 + str r5, [r0, #100] + ldr ip, [r0, #44] + ldr r3, [r1, #52] + ldr r4, [r0, #52] + ldr r7, [r1, #40] + str r6, [sp, #16] + mov ip, ip, ror #29 + ldr r6, [r0, #60] + add ip, ip, r4, ror #25 + mov r3, r3, ror #18 + ldr r4, [r0, #68] + add r3, r3, r7, ror #21 + add ip, ip, r6, ror #19 + ldr r6, [r0, #76] + add ip, ip, r4, ror #16 + add r3, r3, #-1979711488 + ldr r8, [r0, #92] + add ip, ip, r6, ror #13 + ldr r4, [r0, #96] + add r3, r3, #11141120 + ldr sl, [r1, #16] + add ip, ip, fp, ror #9 + add r3, r3, #43520 + ldr r7, [r2, #4] + ldr r6, [r0, #32] + ldr r9, [r0, #88] + add ip, ip, r8, ror #5 + eor r4, r4, r4, lsr #1 + add r3, r3, #162 + sub r3, r3, sl, ror #27 + add ip, ip, r4 + ldr r4, [sp, #12] + eor r3, r3, r7 + rsb r9, r6, r9 + add r9, r9, r4 + add ip, ip, r3 + add ip, ip, r9 + eor r5, r5, r5, lsr #2 + add ip, ip, r5 + ldr r4, [r0, #48] + str ip, [r0, #104] + ldr r5, [r0, #56] + ldr r6, [r0, #64] + mov r4, r4, ror #29 + ldr r3, [r1, #56] + add r4, r4, r5, ror #25 + ldr r5, [r0, #72] + ldr r7, [r1, #44] + add r4, r4, r6, ror #19 + ldr r6, [r0, #80] + ldr r8, [r0, #88] + add r4, r4, r5, ror #16 + mov r3, r3, ror #17 + add r4, r4, r6, ror #13 + add r3, r3, r7, ror #20 + ldr r5, [r0, #100] + ldr r7, [r0, #96] + ldr sl, [r1, #20] + add r4, r4, r8, ror #9 + ldr fp, [r0, #92] + ldr r8, [r2, #8] + ldr r6, [r0, #36] + add r4, r4, r7, ror #5 + eor r5, r5, r5, lsr #1 + sub r3, r3, #1879048201 + sub r3, r3, sl, ror #26 + add r4, r4, r5 + ldr r5, [sp, #16] + eor r3, r3, r8 + rsb r6, r6, fp + add r6, r6, r5 + add r4, r4, r3 + add r4, r4, r6 + eor ip, ip, ip, lsr #2 + add r4, r4, ip + str r4, [r0, #108] + ldr r5, [r0, #52] + ldr r3, [r1, #60] + ldr r7, [r1, #48] + str r6, [sp, #20] + ldr r6, [r0, #60] + ldr ip, [r0, #68] + mov r5, r5, ror #29 + mov r3, r3, ror #16 + add r5, r5, r6, ror #25 + add r3, r3, r7, ror #19 + ldr r6, [r0, #76] + add r5, r5, ip, ror #19 + add r3, r3, #-1795162112 + ldr ip, [r0, #84] + add r5, r5, r6, ror #16 + add r3, r3, #5570560 + ldr sl, [r1, #24] + ldr r8, [r0, #100] + add r5, r5, ip, ror #13 + add r3, r3, #21760 + ldr ip, [r0, #104] + ldr r7, [r2, #12] + add r5, r5, fp, ror #9 + ldr r6, [r0, #40] + ldr fp, [r0, #96] + add r3, r3, #76 + add r5, r5, r8, ror #5 + sub r3, r3, sl, ror #25 + eor ip, ip, ip, lsr #1 + eor r3, r3, r7 + rsb r6, r6, fp + add r5, r5, ip + add fp, r6, r9 + add r5, r5, r3 + eor r4, r4, r4, lsr #2 + ldr ip, [r0, #56] + add r5, r5, fp + add r5, r5, r4 + ldr r3, [r0, #64] + str r5, [r0, #112] + ldr r4, [r0, #72] + mov ip, ip, ror #29 + ldr r6, [r0, #80] + add ip, ip, r3, ror #25 + ldr r7, [r1, #0] + add ip, ip, r4, ror #19 + ldr r3, .L3+4 + ldr r4, [r0, #88] + ldr sl, [r1, #52] + add ip, ip, r6, ror #16 + ldr r6, [r0, #96] + ldr r9, [r1, #28] + add ip, ip, r4, ror #13 + add r3, r3, r7, ror #31 + ldr r4, [r0, #108] + ldr r7, [r0, #104] + ldr r8, [r2, #16] + add ip, ip, r6, ror #9 + add r3, r3, sl, ror #18 + ldr r6, [r0, #44] + ldr sl, [r0, #100] + add ip, ip, r7, ror #5 + sub r3, r3, r9, ror #24 + ldr r7, [sp, #20] + eor r4, r4, r4, lsr #1 + rsb r6, r6, sl + eor r3, r3, r8 + add ip, ip, r4 + add r9, r6, r7 + add ip, ip, r3 + ldr r4, [r0, #68] + ldr r3, [r0, #60] + eor r5, r5, r5, lsr #2 + add ip, ip, r9 + add ip, ip, r5 + ldr r6, [r0, #104] + mov r4, r4, ror #25 + str ip, [r0, #116] + add r4, r4, r3, ror #29 + ldr r5, [r0, #76] + add r4, r4, r6 + ldr r7, [r0, #84] + ldr r3, [r1, #4] + add r4, r4, r5, ror #19 + ldr r5, [r0, #92] + ldr r8, [r1, #56] + add r4, r4, r7, ror #16 + add r4, r4, r5, ror #13 + ldr r7, [r0, #108] + mov r3, r3, ror #30 + add r4, r4, sl, ror #9 + add r3, r3, r8, ror #17 + ldr sl, [r1, #32] + ldr r6, [r0, #48] + ldr r5, [r0, #112] + add r4, r4, r7, ror #5 + sub r3, r3, #1610612746 + ldr r7, [r2, #20] + rsb r4, r6, r4 + eor r5, r5, r5, lsr #1 + sub r3, r3, sl, ror #23 + eor r3, r3, r7 + add r4, r4, r5 + add r4, r4, r3 + eor ip, ip, ip, lsr #2 + add r4, r4, fp + add r4, r4, ip + str r4, [r0, #120] + ldr r3, [r0, #72] + ldr r5, [r0, #64] + ldr ip, [r1, #8] + ldr r6, [r0, #108] + mov r3, r3, ror #25 + ldr r8, [r1, #60] + add r3, r3, r5, ror #29 + ldr r7, [r0, #80] + add r3, r3, r6 + ldr r5, [r0, #88] + mov ip, ip, ror #29 + add r3, r3, r7, ror #19 + add ip, ip, r8, ror #16 + ldr r6, [r0, #96] + add r3, r3, r5, ror #16 + add ip, ip, #-1526726656 + ldr r5, [r0, #104] + add r3, r3, r6, ror #13 + add ip, ip, #5570560 + ldr r6, [r0, #112] + ldr r8, [r1, #36] + add r3, r3, r5, ror #9 + add ip, ip, #21760 + ldr r1, [r0, #116] + ldr r5, [r0, #52] + ldr r7, [r2, #24] + add r3, r3, r6, ror #5 + add ip, ip, #75 + rsb r3, r5, r3 + sub ip, ip, r8, ror #22 + eor r1, r1, r1, lsr #1 + add r3, r3, r1 + eor ip, ip, r7 + add r3, r3, ip + add r3, r3, r9 + eor r4, r4, r4, lsr #2 + add r3, r3, r4 + str r3, [r0, #124] + add sp, sp, #24 + ldmfd sp!, {r4, r5, r6, r7, r8, r9, sl, fp} + bx lr +.L4: + .align 2 +.L3: + .word 1431655760 + .word -1700091231 + .size bmw_small_f1, .-bmw_small_f1 + .align 2 + .global bmw_small_f0 + .type bmw_small_f0, %function +bmw_small_f0: + @ args = 0, pretend = 0, frame = 84 + @ frame_needed = 0, uses_anonymous_args = 0 + @ link register save eliminated. + stmfd sp!, {r4, r5, r6, r7, r8, r9, sl, fp} + ldr r3, [r1, #28] + ldr r7, [r2, #28] + sub sp, sp, #84 + ldr fp, [r2, #20] + ldr r9, [r2, #40] + ldr sl, [r2, #52] + ldr r4, [r1, #52] + eor r3, r7, r3 + ldr r6, [r1, #20] + ldr r5, [r1, #40] + ldr r8, [r2, #56] + str r3, [sp, #76] + ldr ip, [r1, #56] + eor sl, sl, r4 + eor r5, r9, r5 + eor r6, fp, r6 + ldr r4, [sp, #76] + ldr r9, [r2, #32] + str r5, [sp, #72] + eor ip, r8, ip + add r3, r5, r6 + ldr r5, [r1, #32] + rsb r8, r4, ip + add r3, r3, sl + add r3, r3, r8 + eor r5, r9, r5 + str r5, [sp, #56] + str r6, [sp, #68] + ldr r5, [r1, #4] + ldr r6, [r2, #4] + str ip, [sp, #80] + mov ip, r3, lsr #1 + eor ip, ip, r3, asl #3 + eor r6, r5, r6 + eor ip, ip, r3, ror #28 + str r6, [sp, #44] + eor ip, ip, r3, ror #13 + ldr r7, [r2, #0] + ldr r4, [r1, #0] + ldr r6, [sp, #56] + str ip, [sp, #4] + ldr r3, [sp, #44] + ldr ip, [sp, #72] + eor r7, r7, r4 + ldr r5, [r1, #16] + add r4, r6, sl + add r6, ip, r3 + ldr ip, [r2, #16] + eor r5, ip, r5 + ldr r3, [sp, #4] + str r5, [sp, #48] + ldr r5, [r1, #4] + ldr fp, [r2, #44] + add ip, r3, r5 + ldr r3, [r1, #44] + eor fp, fp, r3 + ldr r9, [r1, #12] + str fp, [sp, #64] + add r3, r2, #8 + ldmia r3, {r3, r5} @ phole ldm + eor fp, r5, r9 + ldr r5, [r1, #8] + str r3, [sp, #40] + ldr r3, [r2, #24] + str r5, [sp, #36] + ldr r5, [r1, #24] + str r3, [sp, #32] + ldr r3, [r2, #36] + str r5, [sp, #28] + ldr r5, [r1, #36] + str r3, [sp, #24] + ldr r3, [r2, #48] + ldr r9, [r2, #60] + str r5, [sp, #20] + ldr r2, [r1, #60] + ldr r5, [r1, #48] + add r4, r4, r7 + rsb r4, r6, r4 + str r3, [sp, #16] + ldr r3, [sp, #48] + str ip, [r0, #0] + str r5, [sp, #12] + str r2, [sp, #8] + ldr r5, [sp, #64] + mov r2, r4, lsr #2 + rsb ip, r7, r3 + eor r2, r2, r4, asl #2 + rsb r3, r5, sl + rsb ip, fp, ip + ldr r5, [r1, #16] + eor r2, r2, r4, ror #17 + add ip, ip, r3 + eor r2, r2, r4, ror #3 + add r2, r2, r5 + mov r3, ip, lsr #1 + str r2, [r0, #12] + eor r3, r3, ip, asl #2 + eor r3, r3, ip, ror #24 + ldr r2, [r1, #28] + ldr r4, [sp, #28] + eor r3, r3, ip, ror #9 + ldr ip, [sp, #32] + add r3, r3, r2 + eor ip, ip, r4 + rsb r2, fp, ip + str ip, [sp, #52] + str r3, [r0, #24] + ldr ip, [sp, #20] + ldr r5, [sp, #24] + add r8, r8, r2 + eor r5, r5, ip + ldr r3, [r1, #40] + add r8, r8, r7 + rsb r6, r5, r6 + eor r8, r8, r8, lsr #1 + rsb r6, r2, r6 + add r8, r8, r3 + str r5, [sp, #60] + mov r2, r6, lsr #2 + str r8, [r0, #36] + ldr r3, [sp, #16] + ldr r4, [sp, #12] + eor r2, r2, r6, asl #1 + eor r8, r3, r4 + ldr r5, [sp, #52] + ldr r3, [r1, #52] + ldr ip, [sp, #60] + eor r2, r2, r6, ror #20 + eor r2, r2, r6, ror #7 + rsb sl, r5, sl + add r2, r2, r3 + rsb r5, ip, r8 + ldr r3, [sp, #48] + add r4, sl, r5 + ldr r6, [sp, #8] + rsb r4, r3, r4 + str r2, [r0, #48] + eor r9, r9, r6 + mov r3, r4, lsr #1 + ldr r6, [sp, #76] + eor r3, r3, r4, asl #3 + ldr r2, [r1, #0] + add ip, r9, r6 + eor r3, r3, r4, ror #28 + eor r3, r3, r4, ror #13 + add ip, ip, r7 + rsb ip, r5, ip + add r3, r3, r2 + str r3, [r0, #60] + ldr r4, [sp, #36] + ldr r3, [sp, #40] + mov r2, ip, lsr #2 + eor r2, r2, ip, asl #1 + eor r6, r3, r4 + eor r2, r2, ip, ror #20 + rsb r5, r6, r9 + rsb fp, r8, fp + eor r2, r2, ip, ror #7 + ldr ip, [sp, #72] + ldr r3, [r1, #12] + add r4, r5, fp + add r4, r4, ip + add r2, r2, r3 + mov r3, r4, lsr #1 + eor r3, r3, r4, asl #3 + eor r3, r3, r4, ror #28 + str r3, [sp, #0] + ldr r3, [sp, #68] + str r2, [r0, #8] + rsb ip, r3, #0 + ldr r3, [sp, #0] + ldr r2, [r1, #24] + eor r4, r3, r4, ror #13 + add r3, r4, r2 + add sl, ip, sl + rsb sl, r5, sl + str r3, [r0, #20] + ldr r5, [sp, #56] + ldr r3, [sp, #68] + mov r2, sl, lsr #2 + rsb r4, r3, r5 + ldr r5, [sp, #60] + eor r2, r2, sl, asl #2 + ldr r3, [r1, #36] + rsb r7, r7, r5 + eor r2, r2, sl, ror #17 + add r7, r7, r4 + eor r2, r2, sl, ror #3 + rsb r7, r6, r7 + add r2, r2, r3 + str r2, [r0, #32] + mov r3, r7, lsr #1 + eor r3, r3, r7, asl #2 + ldr r2, [r1, #48] + eor r3, r3, r7, ror #24 + eor r3, r3, r7, ror #9 + add r3, r3, r2 + ldr r7, [sp, #64] + ldr r5, [sp, #80] + str r3, [r0, #44] + add r3, r4, fp + rsb r2, r7, r3 + add r4, r5, r7 + ldr r7, [sp, #56] + add r9, r9, r7 + ldr r7, [sp, #52] + ldr r3, [r1, #60] + eor r5, r2, r2, lsr #1 + add r2, r4, r7 + add r3, r5, r3 + rsb r5, r9, r2 + str r3, [r0, #56] + mov r3, r5, lsr #1 + eor r3, r3, r5, asl #2 + ldr r2, [r1, #8] + eor r3, r3, r5, ror #24 + eor r3, r3, r5, ror #9 + add r3, r3, r2 + str r3, [r0, #4] + ldr r3, [sp, #44] + ldr r5, [sp, #60] + add r2, r6, r3 + ldr r7, [sp, #80] + add r2, r2, r5 + rsb ip, r7, ip + rsb r2, r4, r2 + ldr r4, [sp, #48] + add ip, ip, r3 + rsb ip, r4, ip + ldr r3, [r1, #20] + rsb ip, r8, ip + eor r2, r2, r2, lsr #1 + add r2, r2, r3 + mov r3, ip, lsr #2 + eor r3, r3, ip, asl #1 + str r2, [r0, #16] + eor r3, r3, ip, ror #20 + ldr r5, [sp, #76] + eor r3, r3, ip, ror #7 + ldr ip, [sp, #44] + ldr r2, [r1, #32] + rsb r9, ip, r9 + mov r7, r4 + add r4, r5, r4 + rsb r9, r4, r9 + add r3, r3, r2 + str r3, [r0, #28] + mov r2, r9, lsr #1 + ldr r3, [sp, #64] + eor r2, r2, r9, asl #3 + ldr r5, [sp, #72] + add ip, r3, r6 + eor r2, r2, r9, ror #28 + ldr r3, [r1, #44] + eor r2, r2, r9, ror #13 + add r6, ip, r5 + add r6, r6, r4 + add r2, r2, r3 + str r2, [r0, #40] + mov r3, r6, lsr #2 + eor r3, r3, r6, asl #2 + ldr r2, [r1, #56] + eor r3, r3, r6, ror #17 + eor r3, r3, r6, ror #3 + add r3, r3, r2 + str r3, [r0, #52] + add sp, sp, #84 + ldmfd sp!, {r4, r5, r6, r7, r8, r9, sl, fp} + bx lr + .size bmw_small_f0, .-bmw_small_f0 + .align 2 + .global bmw224_init + .type bmw224_init, %function +bmw224_init: + @ args = 0, pretend = 0, frame = 0 + @ frame_needed = 0, uses_anonymous_args = 0 + @ link register save eliminated. + mov r3, #65536 + add r3, r3, #512 + add r3, r3, #3 + str r3, [r0, #0] + mov r2, r0 + add r1, r0, #60 +.L8: + ldr r3, [r2, #0] + add r3, r3, #67108864 + add r3, r3, #262144 + add r3, r3, #1024 + add r3, r3, #4 + str r3, [r2, #4]! + cmp r2, r1 + bne .L8 + mov r3, #0 + str r3, [r0, #64] + bx lr + .size bmw224_init, .-bmw224_init + .align 2 + .global bmw256_init + .type bmw256_init, %function +bmw256_init: + @ args = 0, pretend = 0, frame = 0 + @ frame_needed = 0, uses_anonymous_args = 0 + @ link register save eliminated. + ldr r3, .L16 + str r3, [r0, #0] + mov r2, r0 + add r1, r0, #60 +.L13: + ldr r3, [r2, #0] + add r3, r3, #67108864 + add r3, r3, #262144 + add r3, r3, #1024 + add r3, r3, #4 + str r3, [r2, #4]! + cmp r2, r1 + bne .L13 + mov r3, #0 + str r3, [r0, #64] + bx lr +.L17: + .align 2 +.L16: + .word 1078018627 + .size bmw256_init, .-bmw256_init + .align 2 + .global bmw256_ctx2hash + .type bmw256_ctx2hash, %function +bmw256_ctx2hash: + @ args = 0, pretend = 0, frame = 0 + @ frame_needed = 0, uses_anonymous_args = 0 + @ link register save eliminated. + add r1, r1, #32 + mov r2, #32 + b memcpy + .size bmw256_ctx2hash, .-bmw256_ctx2hash + .align 2 + .global bmw224_ctx2hash + .type bmw224_ctx2hash, %function +bmw224_ctx2hash: + @ args = 0, pretend = 0, frame = 0 + @ frame_needed = 0, uses_anonymous_args = 0 + @ link register save eliminated. + add r1, r1, #36 + mov r2, #28 + b memcpy + .size bmw224_ctx2hash, .-bmw224_ctx2hash + .align 2 + .global bmw_small_nextBlock + .type bmw_small_nextBlock, %function +bmw_small_nextBlock: + @ args = 0, pretend = 0, frame = 192 + @ frame_needed = 0, uses_anonymous_args = 0 + stmfd sp!, {r4, r5, r6, r7, r8, r9, sl, fp, lr} + sub sp, sp, #192 + mov r7, r0 + mov r8, r1 + add r4, sp, #64 + mov r0, r4 + mov r1, r7 + mov r2, r8 + bl bmw_small_f0 + mov r0, r4 + mov r1, r8 + mov r2, r7 + bl bmw_small_f1 + add r1, sp, #128 + ldmia r1, {r1, r2, r3, ip} @ phole ldm + str ip, [sp, #48] + ldr lr, [sp, #168] + eor ip, r1, r2 + str r1, [sp, #60] + ldr r1, [sp, #144] + str r2, [sp, #56] + ldr r2, [sp, #172] + str lr, [sp, #28] + str r1, [sp, #44] + str r3, [sp, #52] + ldr r1, [sp, #176] + eor ip, ip, r3 + ldr r3, [sp, #48] + ldr fp, [sp, #164] + str r2, [sp, #24] + ldr lr, [sp, #148] + ldr r2, [sp, #28] + str r1, [sp, #20] + eor ip, ip, r3 + ldr r1, [sp, #152] + ldr r3, [sp, #44] + str lr, [sp, #40] + eor r0, r2, fp + ldr lr, [sp, #24] + ldr r2, [sp, #180] + str r1, [sp, #36] + eor ip, ip, r3 + ldr r1, [sp, #184] + ldr r3, [sp, #40] + str r2, [sp, #16] + eor r0, r0, lr + ldr lr, [sp, #20] + eor ip, ip, r3 + str r1, [sp, #12] + ldr r3, [sp, #16] + ldr r1, [sp, #188] + eor r0, r0, lr + ldr r2, [sp, #36] + ldr lr, [sp, #156] + eor r0, r0, r3 + str r1, [sp, #8] + ldr r3, [sp, #12] + eor ip, ip, r2 + eor r0, r0, r3 + ldr r2, [sp, #160] + eor ip, ip, lr + ldr r9, [r8, #0] + eor r0, r0, r1 + add r1, sp, #60 + ldmia r1, {r1, r3} @ phole ldm + str r2, [sp, #32] + eor r2, ip, r2 + eor r0, r0, r2 + eor r9, r9, r1, lsr #5 + eor r2, r2, r3 + eor r9, r9, r0, asl #5 + add r9, r9, r2 + str r9, [r7, #0] + ldr r3, [sp, #68] + ldr sl, [r8, #4] + ldr r2, [sp, #56] + eor r3, r3, fp + eor sl, sl, r2, asl #8 + eor r3, r3, ip + eor sl, sl, r0, lsr #7 + add sl, sl, r3 + str sl, [r7, #4] + ldr r3, [sp, #72] + ldr r1, [sp, #28] + ldr r2, [r8, #8] + eor r3, r3, r1 + ldr r1, [sp, #52] + eor r2, r2, r1, asl #5 + eor r2, r2, r0, lsr #5 + eor r3, r3, ip + add r3, r2, r3 + str r3, [r7, #8] + ldr r1, [sp, #24] + str r3, [sp, #4] + ldr r3, [sp, #76] + ldr r2, [r8, #12] + eor r3, r3, r1 + ldr r1, [sp, #48] + eor r2, r2, r1, asl #5 + eor r2, r2, r0, lsr #1 + eor r3, r3, ip + add r3, r2, r3 + str r3, [r7, #12] + ldr r2, [sp, #20] + str r3, [sp, #0] + ldr r3, [sp, #80] + ldr r1, [r8, #16] + eor r3, r3, r2 + ldr r2, [sp, #44] + eor r1, r1, r2 + eor r3, r3, ip + eor r1, r1, r0, lsr #3 + add r1, r1, r3 + str r1, [r7, #16] + ldr r3, [sp, #84] + ldr r2, [sp, #16] + ldr r4, [r8, #20] + eor r3, r3, r2 + ldr r2, [sp, #40] + eor r4, r4, r2, lsr #6 + eor r3, r3, ip + eor r4, r4, r0, asl #6 + add r4, r4, r3 + str r4, [r7, #20] + ldr r3, [sp, #36] + ldr r5, [r8, #24] + ldr r2, [sp, #12] + eor r5, r5, r3, asl #6 + ldr r3, [sp, #88] + eor r3, r3, r2 + eor r3, r3, ip + eor r5, r5, r0, lsr #4 + add r5, r5, r3 + str r5, [r7, #24] + ldr r6, [r8, #28] + ldr r3, [sp, #92] + ldr r2, [sp, #8] + eor r6, r6, lr, asl #2 + eor r3, r3, r2 + eor r3, r3, ip + eor r6, r6, r0, lsr #11 + add r6, r6, r3 + str r6, [r7, #28] + ldr r3, [sp, #96] + ldr r2, [r8, #32] + eor r3, r3, lr + ldr lr, [sp, #32] + eor r3, r3, ip, asl #8 + eor r2, r2, lr + add r3, r3, r1, ror #23 + eor r2, r2, r0 + add r3, r3, r2 + str r3, [r7, #32] + ldr r3, [sp, #100] + ldr r1, [sp, #60] + ldr r2, [r8, #36] + eor r3, r3, r1 + eor r3, r3, ip, lsr #6 + eor r2, r2, fp + eor r2, r2, r0 + add r3, r3, r4, ror #22 + add r3, r3, r2 + str r3, [r7, #36] + ldr r3, [sp, #104] + ldr lr, [sp, #56] + ldr r2, [r8, #40] + ldr r1, [sp, #28] + eor r3, r3, lr + eor r2, r2, r1 + eor r3, r3, ip, asl #6 + eor r2, r2, r0 + add r3, r3, r5, ror #21 + add r3, r3, r2 + str r3, [r7, #40] + ldr r3, [sp, #108] + ldr lr, [sp, #52] + ldr r2, [r8, #44] + ldr r1, [sp, #24] + eor r3, r3, lr + eor r2, r2, r1 + eor r3, r3, ip, asl #4 + eor r2, r2, r0 + add r3, r3, r6, ror #20 + add r3, r3, r2 + str r3, [r7, #44] + ldr r3, [sp, #112] + ldr lr, [sp, #48] + ldr r2, [r8, #48] + ldr r1, [sp, #20] + eor r3, r3, lr + eor r2, r2, r1 + eor r3, r3, ip, lsr #3 + eor r2, r2, r0 + add r3, r3, r9, ror #19 + add r3, r3, r2 + str r3, [r7, #48] + ldr r3, [sp, #116] + ldr lr, [sp, #44] + ldr r2, [r8, #52] + ldr r1, [sp, #16] + eor r3, r3, lr + eor r2, r2, r1 + eor r3, r3, ip, lsr #4 + eor r2, r2, r0 + add r3, r3, sl, ror #18 + add r3, r3, r2 + str r3, [r7, #52] + ldr lr, [sp, #40] + ldr r3, [sp, #120] + ldr r1, [sp, #12] + ldr r2, [r8, #56] + eor r3, r3, lr + ldr lr, [sp, #4] + eor r2, r2, r1 + eor r3, r3, ip, lsr #7 + add r3, r3, lr, ror #17 + eor r2, r2, r0 + add r3, r3, r2 + str r3, [r7, #56] + ldr r3, [sp, #124] + ldr r2, [sp, #36] + eor r3, r3, r2 + ldr r1, [r8, #60] + eor r3, r3, ip, lsr #2 + ldr ip, [sp, #8] + ldr lr, [sp, #0] + ldr r2, [r7, #64] + eor r1, r1, ip + add r3, r3, lr, ror #16 + eor r1, r1, r0 + add r3, r3, r1 + add r2, r2, #1 + str r2, [r7, #64] + str r3, [r7, #60] + add sp, sp, #192 + ldmfd sp!, {r4, r5, r6, r7, r8, r9, sl, fp, pc} + .size bmw_small_nextBlock, .-bmw_small_nextBlock + .align 2 + .global bmw224_nextBlock + .type bmw224_nextBlock, %function +bmw224_nextBlock: + @ args = 0, pretend = 0, frame = 0 + @ frame_needed = 0, uses_anonymous_args = 0 + @ link register save eliminated. + b bmw_small_nextBlock + .size bmw224_nextBlock, .-bmw224_nextBlock + .align 2 + .global bmw256_nextBlock + .type bmw256_nextBlock, %function +bmw256_nextBlock: + @ args = 0, pretend = 0, frame = 0 + @ frame_needed = 0, uses_anonymous_args = 0 + @ link register save eliminated. + b bmw_small_nextBlock + .size bmw256_nextBlock, .-bmw256_nextBlock + .align 2 + .global bmw_small_lastBlock + .type bmw_small_lastBlock, %function +bmw_small_lastBlock: + @ args = 0, pretend = 0, frame = 400 + @ frame_needed = 0, uses_anonymous_args = 0 + stmfd sp!, {r4, r5, r6, r7, r8, r9, sl, fp, lr} + mov r2, r2, asl #16 + mov ip, #508 + mov r4, r2, lsr #16 + add ip, ip, #3 + cmp r4, ip + sub sp, sp, #400 + mov fp, r0 + mov r8, r1 + bls .L29 + mov r7, ip + mov r6, r1 + mov r5, r4 +.L30: + mov r1, r6 + mov r0, fp + bl bmw_small_nextBlock + sub r3, r5, #512 + mov r3, r3, asl #16 + mov r5, r3, lsr #16 + cmp r5, r7 + add r6, r6, #64 + bhi .L30 + sub r2, r4, #512 + mov r2, r2, asl #16 + mov r3, r2, lsr #25 + add r3, r3, #1 + mov r2, r2, lsr #16 + mov r3, r3, asl #6 + mov r4, r2, asl #23 + add r8, r8, r3 + mov r4, r4, lsr #23 +.L29: + add r9, sp, #336 + mov r1, #0 + mov r2, #64 + mov r0, r9 + bl memset + add r2, r4, #7 + mov r1, r8 + mov r2, r2, asr #3 + mov r0, r9 + bl memcpy + add r0, sp, #400 + add ip, r0, r4, lsr #3 + ldrb r2, [ip, #-64] @ zero_extendqisi2 + and r1, r4, #7 + mov r3, #128 + orr r2, r2, r3, asr r1 + cmp r4, #448 + strb r2, [ip, #-64] + bge .L37 + add r2, sp, #348 + add r3, sp, #352 + add r7, sp, #356 + add r8, sp, #360 + add ip, sp, #364 + str r2, [sp, #12] + str r3, [sp, #8] + str r7, [sp, #4] + str r8, [sp, #20] + str ip, [sp, #16] + ldr r1, [fp, #64] + add r5, sp, #340 + add r6, sp, #344 +.L31: + mov r2, #512 + mov r3, r4 + mov r4, #0 + umlal r3, r4, r2, r1 + mov r0, fp + mov r1, r9 + str r3, [r9, #56] + str r4, [r9, #60] + bl bmw_small_nextBlock + mov r2, #64 + mov r0, r9 + mov r1, #170 + bl memset + mov r2, #0 + mov r3, #160 +.L32: + strb r3, [r9, r2] + add r2, r2, #4 + add r3, r3, #1 + cmp r2, #64 + and r3, r3, #255 + bne .L32 + add r4, sp, #208 + mov r0, r4 + mov r1, r9 + mov r2, fp + bl bmw_small_f0 + mov r0, r4 + mov r1, fp + mov r2, r9 + bl bmw_small_f1 + add r8, sp, #272 + ldmia r8, {r8, ip, lr} @ phole ldm + add r2, sp, #308 + ldmia r2, {r2, r3} @ phole ldm + eor r1, ip, r8 + str ip, [sp, #200] + ldr ip, [sp, #292] + ldr r0, [sp, #284] + str r8, [sp, #204] + ldr r8, [sp, #316] + ldr r7, [sp, #288] + str ip, [sp, #184] + str lr, [sp, #196] + eor ip, r3, r2 + eor r1, r1, lr + ldr lr, [sp, #320] + str r2, [sp, #172] + eor r1, r1, r0 + ldr r2, [sp, #324] + eor ip, ip, r8 + str r3, [sp, #168] + str r7, [sp, #188] + ldr r3, [sp, #184] + eor r1, r1, r7 + eor ip, ip, lr + ldr r7, [sp, #328] + eor ip, ip, r2 + eor r1, r1, r3 + str r7, [sp, #152] + eor ip, ip, r7 + ldr r3, [fp, #0] + ldr r7, [sp, #204] + str r0, [sp, #192] + str r8, [sp, #164] + ldr r0, [sp, #296] + ldr r8, [sp, #332] + str lr, [sp, #160] + add sl, sp, #300 + ldmia sl, {sl, lr} @ phole ldm + eor r3, r3, r7, lsr #5 + str r0, [sp, #180] + str r2, [sp, #156] + eor r1, r1, r0 + ldr r2, [fp, #4] + str r8, [sp, #148] + str lr, [sp, #176] + eor ip, ip, r8 + str r3, [sp, #144] + ldr r8, [sp, #200] + eor r1, r1, sl + eor r0, r1, lr + ldr r3, [sp, #172] + eor lr, r2, r8, asl #8 + ldr r2, [sp, #212] + ldr r7, [sp, #144] + eor r2, r3, r2 + ldr r3, [sp, #208] + eor ip, ip, r0 + eor r7, r7, ip, asl #5 + eor r0, r0, r3 + eor r2, r2, r1 + eor r3, lr, ip, lsr #7 + add r0, r7, r0 + add r2, r3, r2 + str r0, [r9, #0] + str r2, [r5, #0] + ldr r8, [fp, #8] + ldr r3, [sp, #216] + ldr lr, [sp, #168] + str r0, [sp, #136] + ldr r0, [sp, #196] + eor r3, lr, r3 + eor r8, r8, r0, asl #5 + eor r3, r3, r1 + eor r8, r8, ip, lsr #5 + add r8, r8, r3 + str r8, [r6, #0] + ldr r7, [fp, #12] + ldr r3, [sp, #220] + ldr r5, [sp, #192] + str r2, [sp, #132] + ldr r2, [sp, #164] + eor r7, r7, r5, asl #5 + eor r3, r2, r3 + eor r3, r3, r1 + ldr lr, [sp, #12] + eor r7, r7, ip, lsr #1 + add r7, r7, r3 + str r7, [lr, #0] + ldr r6, [fp, #16] + ldr r3, [sp, #224] + ldr r0, [sp, #160] + ldr r2, [sp, #188] + eor r3, r0, r3 + eor r6, r2, r6 + eor r3, r3, r1 + eor r6, r6, ip, lsr #3 + add r6, r6, r3 + ldr r3, [sp, #8] + str r6, [r3, #0] + ldr r5, [fp, #20] + ldr lr, [sp, #184] + ldr r3, [sp, #228] + ldr r0, [sp, #156] + eor r5, r5, lr, lsr #6 + eor r3, r0, r3 + eor r3, r3, r1 + ldr r2, [sp, #4] + eor r5, r5, ip, asl #6 + add r5, r5, r3 + str r5, [r2, #0] + ldr lr, [sp, #152] + ldr r0, [sp, #180] + ldr r4, [fp, #24] + ldr r3, [sp, #232] + eor r4, r4, r0, asl #6 + eor r3, lr, r3 + eor r3, r3, r1 + eor r4, r4, ip, lsr #4 + ldr r2, [sp, #20] + add r4, r4, r3 + str r4, [r2, #0] + ldr r0, [fp, #28] + ldr r3, [sp, #236] + ldr lr, [sp, #148] + eor r0, r0, sl, asl #2 + eor r3, lr, r3 + eor r3, r3, r1 + ldr r2, [sp, #16] + eor r0, r0, ip, lsr #11 + add r0, r0, r3 + str r0, [r2, #0] + ldr r3, [sp, #240] + ldr r2, [sp, #244] + eor sl, sl, r3 + ldr r3, [sp, #204] + ldr lr, [sp, #200] + eor r2, r3, r2 + ldr r3, [sp, #248] + eor r3, lr, r3 + str r2, [sp, #124] + str r3, [sp, #112] + ldr r2, [sp, #252] + ldr r3, [sp, #196] + ldr lr, [sp, #192] + eor r2, r3, r2 + ldr r3, [sp, #256] + eor r3, lr, r3 + str r2, [sp, #100] + str r3, [sp, #88] + ldr r2, [sp, #260] + ldr r3, [sp, #188] + ldr lr, [sp, #184] + eor r2, r3, r2 + ldr r3, [sp, #264] + eor r3, lr, r3 + str r2, [sp, #72] + str r3, [sp, #52] + ldr r2, [sp, #268] + ldr r3, [sp, #180] + eor lr, r3, r2 + ldr r3, [fp, #32] + ldr r2, [sp, #176] + eor r2, r2, r3 + str r2, [sp, #128] + ldr r3, [fp, #36] + ldr r2, [sp, #172] + eor r2, r2, r3 + str r2, [sp, #116] + ldr r3, [fp, #40] + ldr r2, [sp, #168] + eor r2, r2, r3 + str r2, [sp, #104] + ldr r3, [fp, #44] + ldr r2, [sp, #164] + eor r2, r2, r3 + str r2, [sp, #92] + ldr r3, [fp, #48] + ldr r2, [sp, #160] + eor r2, r2, r3 + str r2, [sp, #76] + ldr r3, [fp, #52] + ldr r2, [sp, #156] + eor r2, r2, r3 + str r2, [sp, #60] + ldr r3, [fp, #56] + ldr r2, [sp, #152] + eor lr, lr, r1, lsr #2 + eor r2, r2, r3 + str r2, [sp, #44] + str lr, [sp, #36] + ldr r2, [fp, #60] + ldr lr, [sp, #148] + ldr r3, [sp, #124] + eor lr, lr, r2 + ldr r2, [sp, #112] + str lr, [sp, #28] + eor lr, r3, r1, lsr #6 + ldr r3, [sp, #100] + eor r2, r2, r1, asl #6 + str r2, [sp, #0] + eor r2, r3, r1, asl #4 + ldr r3, [sp, #88] + eor r3, r3, r1, lsr #3 + str r3, [sp, #84] + ldr r3, [sp, #72] + eor r3, r3, r1, lsr #4 + str r3, [sp, #68] + ldr r3, [sp, #52] + add lr, lr, r5, ror #22 + add r2, r2, r0, ror #20 + ldr r5, [sp, #0] + ldr r0, [sp, #136] + eor sl, sl, r1, asl #8 + str lr, [sp, #120] + eor r1, r3, r1, lsr #7 + str r2, [sp, #96] + ldr r3, [sp, #132] + ldr lr, [sp, #84] + ldr r2, [sp, #68] + add r4, r5, r4, ror #21 + add lr, lr, r0, ror #19 + add r2, r2, r3, ror #18 + str r4, [sp, #108] + str lr, [sp, #80] + str r2, [sp, #64] + ldr r5, [sp, #36] + add r7, r5, r7, ror #16 + str r7, [sp, #32] + ldr r7, [sp, #28] + add r1, r1, r8, ror #17 + ldr r0, [sp, #116] + ldr r8, [sp, #128] + ldr r2, [sp, #104] + ldr r3, [sp, #92] + eor r7, r7, ip + str r7, [sp, #24] + eor lr, r8, ip + eor r5, r0, ip + eor r8, r3, ip + eor r7, r2, ip + ldr r0, [sp, #76] + ldr r3, [sp, #44] + ldr r2, [sp, #60] + add sl, sl, r6, ror #23 + eor r2, r2, ip + eor r6, r0, ip + add sl, sl, lr + eor ip, r3, ip + ldr lr, [sp, #108] + str ip, [sp, #40] + str r1, [sp, #48] + str r2, [sp, #56] + ldr ip, [sp, #120] + add r7, lr, r7 + mov r4, r9 + ldmia r4!, {r0, r1, r2, r3} + str r7, [r9, #40] + ldr r7, [sp, #96] + add r5, ip, r5 + ldr lr, [sp, #56] + ldr ip, [sp, #64] + str r5, [r9, #36] + add r5, r7, r8 + ldr r8, [sp, #80] + add r7, r8, r6 + add r6, ip, lr + ldr ip, [sp, #48] + ldr lr, [sp, #40] + add r8, ip, lr + ldr ip, [sp, #32] + ldr lr, [sp, #24] + add ip, ip, lr + str ip, [r9, #60] + mov ip, fp + str sl, [r9, #32] + str r5, [r9, #44] + str r7, [r9, #48] + str r6, [r9, #52] + str r8, [r9, #56] + stmia ip!, {r0, r1, r2, r3} + ldmia r4!, {r0, r1, r2, r3} + stmia ip!, {r0, r1, r2, r3} + ldmia r4!, {r0, r1, r2, r3} + stmia ip!, {r0, r1, r2, r3} + ldmia r4, {r0, r1, r2, r3} + stmia ip, {r0, r1, r2, r3} + add sp, sp, #400 + ldmfd sp!, {r4, r5, r6, r7, r8, r9, sl, fp, pc} +.L37: + mov r0, fp + mov r1, r9 + bl bmw_small_nextBlock + ldr r1, [fp, #64] + mov r3, #0 + str r3, [sp, #388] + str r3, [sp, #336] + str r3, [sp, #340] + str r3, [sp, #344] + str r3, [sp, #348] + str r3, [sp, #352] + str r3, [sp, #356] + str r3, [sp, #360] + str r3, [sp, #364] + str r3, [sp, #368] + str r3, [sp, #372] + str r3, [sp, #376] + str r3, [sp, #380] + str r3, [sp, #384] + sub r1, r1, #1 + add lr, sp, #348 + add r0, sp, #352 + add r2, sp, #356 + add r3, sp, #360 + add r7, sp, #364 + str r1, [fp, #64] + add r5, sp, #340 + add r6, sp, #344 + str lr, [sp, #12] + str r0, [sp, #8] + str r2, [sp, #4] + str r3, [sp, #20] + str r7, [sp, #16] + b .L31 + .size bmw_small_lastBlock, .-bmw_small_lastBlock + .align 2 + .global bmw256_lastBlock + .type bmw256_lastBlock, %function +bmw256_lastBlock: + @ args = 0, pretend = 0, frame = 0 + @ frame_needed = 0, uses_anonymous_args = 0 + @ link register save eliminated. + mov r2, r2, asl #16 + mov r2, r2, lsr #16 + b bmw_small_lastBlock + .size bmw256_lastBlock, .-bmw256_lastBlock + .align 2 + .global bmw224_lastBlock + .type bmw224_lastBlock, %function +bmw224_lastBlock: + @ args = 0, pretend = 0, frame = 0 + @ frame_needed = 0, uses_anonymous_args = 0 + @ link register save eliminated. + mov r2, r2, asl #16 + mov r2, r2, lsr #16 + b bmw_small_lastBlock + .size bmw224_lastBlock, .-bmw224_lastBlock + .align 2 + .global bmw256 + .type bmw256, %function +bmw256: + @ args = 0, pretend = 0, frame = 68 + @ frame_needed = 0, uses_anonymous_args = 0 + stmfd sp!, {r4, r5, r6, r7, r8, r9, sl, lr} + ldr r3, .L49 + sub sp, sp, #68 + add r6, sp, #68 + str r3, [r6, #-68]! + mov sl, r1 + mov r7, r2 + mov r9, r0 + mov r2, sp + add r1, sp, #60 +.L43: + ldr r3, [r2, #0] + add r3, r3, #67108864 + add r3, r3, #262144 + add r3, r3, #1024 + add r3, r3, #4 + str r3, [r2, #4]! + cmp r2, r1 + bne .L43 + mov r2, #508 + add r2, r2, #3 + mov r3, #0 + cmp r7, r2 + str r3, [sp, #64] + bls .L44 + mov r8, r2 + mov r5, sl + mov r4, r7 +.L45: + mov r1, r5 + sub r4, r4, #512 + mov r0, sp + bl bmw_small_nextBlock + cmp r4, r8 + add r5, r5, #64 + bhi .L45 + sub r2, r7, #512 + mov r3, r2, lsr #9 + add r3, r3, #1 + mov r3, r3, asl #6 + mov r7, r2, asl #23 + add sl, sl, r3 + mov r7, r7, lsr #23 +.L44: + mov r2, r7, asl #16 + mov r1, sl + mov r0, sp + mov r2, r2, lsr #16 + bl bmw_small_lastBlock + mov r0, r9 + add r1, sp, #32 + mov r2, #32 + bl memcpy + add sp, sp, #68 + ldmfd sp!, {r4, r5, r6, r7, r8, r9, sl, pc} +.L50: + .align 2 +.L49: + .word 1078018627 + .size bmw256, .-bmw256 + .align 2 + .global bmw224 + .type bmw224, %function +bmw224: + @ args = 0, pretend = 0, frame = 68 + @ frame_needed = 0, uses_anonymous_args = 0 + stmfd sp!, {r4, r5, r6, r7, r8, r9, sl, lr} + mov r3, #65536 + sub sp, sp, #68 + add r3, r3, #512 + add r3, r3, #3 + add r6, sp, #68 + str r3, [r6, #-68]! + mov sl, r1 + mov r7, r2 + mov r9, r0 + mov r2, sp + add r1, sp, #60 +.L52: + ldr r3, [r2, #0] + add r3, r3, #67108864 + add r3, r3, #262144 + add r3, r3, #1024 + add r3, r3, #4 + str r3, [r2, #4]! + cmp r2, r1 + bne .L52 + mov r2, #508 + add r2, r2, #3 + mov r3, #0 + cmp r7, r2 + str r3, [sp, #64] + bls .L53 + mov r8, r2 + mov r5, sl + mov r4, r7 +.L54: + mov r1, r5 + sub r4, r4, #512 + mov r0, sp + bl bmw_small_nextBlock + cmp r4, r8 + add r5, r5, #64 + bhi .L54 + sub r2, r7, #512 + mov r3, r2, lsr #9 + add r3, r3, #1 + mov r3, r3, asl #6 + mov r7, r2, asl #23 + add sl, sl, r3 + mov r7, r7, lsr #23 +.L53: + mov r2, r7, asl #16 + mov r1, sl + mov r0, sp + mov r2, r2, lsr #16 + bl bmw_small_lastBlock + mov r0, r9 + add r1, sp, #36 + mov r2, #28 + bl memcpy + add sp, sp, #68 + ldmfd sp!, {r4, r5, r6, r7, r8, r9, sl, pc} + .size bmw224, .-bmw224 + .ident "GCC: (GNU) 4.3.2" diff --git a/bmw/bmw_small_speed_asm_f0.S b/bmw/bmw_small_speed_asm_f0.S new file mode 100644 index 0000000..001eafa --- /dev/null +++ b/bmw/bmw_small_speed_asm_f0.S @@ -0,0 +1,118 @@ +/* bmw_small_speed_asm_f0.S */ +/* + This file is part of the ARM-Crypto-Lib. + Copyright (C) 2006-2010 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + + .syntax unified + +.macro S32_0 out:req in:req + lsr \out, \in, #1 + eor \out, \out, \in, LSL #3 + eor \out, \out, \in, ROR #28 + eor \out, \out, \in, ROR #13 +.endm + +.macro S32_1 out:req in:req + lsr \out, \in, #1 + eor \out, \out, \in, LSL #2 + eor \out, \out, \in, ROR #24 + eor \out, \out, \in, ROR #9 +.endm + +.macro S32_2 out:req in:req + lsr \out, \in, #2 + eor \out, \out, \in, LSL #1 + eor \out, \out, \in, ROR #20 + eor \out, \out, \in, ROR #7 +.endm + +.macro S32_3 out:req in:req + lsr \out, \in, #2 + eor \out, \out, \in, LSL #2 + eor \out, \out, \in, ROR #17 + eor \out, \out, \in, ROR #3 +.endm + +.macro S32_4 in:req + eor \in, \in, \in, LSR #1 +.endm + +.macro S32_5 in:req + eor \in, \in, \in, LSR #2 +.endm + +#define T00_ADDR [SP, #(15-3)*4] +#define T01_ADDR [SP, #(15-2)*4] +#define T02_ADDR [SP, #(15-1)*4] +#define T03_ADDR [SP, #(15-0)*4] +#define T04_ADDR [SP, #(15-7)*4] +#define T05_ADDR [SP, #(15-6)*4] +#define T06_ADDR [SP, #(15-5)*4] +#define T07_ADDR [SP, #(15-4)*4] +#define T08_ADDR [SP, #(15-11)*4] +#define T09_ADDR [SP, #(15-10)*4] +#define T10_ADDR [SP, #(15-9)*4] +#define T11_ADDR [SP, #(15-8)*4] +#define T12_ADDR [SP, #(15-15)*4] +#define T13_ADDR [SP, #(15-14)*4] +#define T14_ADDR [SP, #(15-13)*4] +#define T15_ADDR [SP, #(15-12)*4] + + .text + .align 2 + .thumb + .thumb_func + .type bmw_small_f0, %function + .global bmw_small_f0 +bmw_small_f0: + push {r4, r5, r6, r7, r8, r9, r10, r11, r12, r14} + /* memxor(, h, m, 16) */ + ldmia r1!, {r4, r5, r6, r7} + ldmia r2!, {r8, r9, r10, r11} + eor r4, r8 + eor r5, r9 + eor r6, r10 + eor r7, r11 + push {r4, r5, r6, r7} + ldmia r1!, {r4, r5, r6, r7} + ldmia r2!, {r8, r9, r10, r11} + eor r4, r8 + eor r5, r9 + eor r6, r10 + eor r7, r11 + push {r4, r5, r6, r7} + ldmia r1!, {r4, r5, r6, r7} + ldmia r2!, {r8, r9, r10, r11} + eor r4, r8 + eor r5, r9 + eor r6, r10 + eor r7, r11 + push {r4, r5, r6, r7} + ldmia r1!, {r4, r5, r6, r7} + ldmia r2!, {r8, r9, r10, r11} + eor r4, r8 + eor r5, r9 + eor r6, r10 + eor r7, r11 + push {r4, r5, r6, r7} + sub r1, #16*4 + +#include "f0_small_autogen.i" + + add SP, #16*4 + pop {r4, r5, r6, r7, r8, r9, r10, r11, r12, PC} + diff --git a/bmw/bmw_small_speed_asmmix_f0.S b/bmw/bmw_small_speed_asmmix_f0.S new file mode 100644 index 0000000..8225271 --- /dev/null +++ b/bmw/bmw_small_speed_asmmix_f0.S @@ -0,0 +1,71 @@ +/* bmw_small_speed_asm_f0.S */ +/* + This file is part of the ARM-Crypto-Lib. + Copyright (C) 2006-2010 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + + .syntax unified + +.macro S32_0 out:req in:req + lsr \out, \in, #1 + eor \out, \out, \in, LSL #3 + eor \out, \out, \in, ROR #28 + eor \out, \out, \in, ROR #13 +.endm + +.macro S32_1 out:req in:req + lsr \out, \in, #1 + eor \out, \out, \in, LSL #2 + eor \out, \out, \in, ROR #24 + eor \out, \out, \in, ROR #9 +.endm + +.macro S32_2 out:req in:req + lsr \out, \in, #2 + eor \out, \out, \in, LSL #1 + eor \out, \out, \in, ROR #20 + eor \out, \out, \in, ROR #7 +.endm + +.macro S32_3 out:req in:req + lsr \out, \in, #2 + eor \out, \out, \in, LSL #2 + eor \out, \out, \in, ROR #17 + eor \out, \out, \in, ROR #3 +.endm + +.macro S32_4 in:req + eor \in, \in, \in, LSR #1 +.endm + +.macro S32_5 in:req + eor \in, \in, \in, LSR #2 +.endm + + + .text + .align 2 + .thumb + .thumb_func + .type bmw_small_f0, %function + .global bmw_small_f0 +bmw_small_f0: + push {r4, r5, r6, r7, r8, r9, r10, r11, r12, r14} + +#include "f0_small_autogen_mix.i" + + pop {r4, r5, r6, r7, r8, r9, r10, r11, r12, PC} + diff --git a/bmw/bmw_small_speed_cstub.c b/bmw/bmw_small_speed_cstub.c new file mode 100644 index 0000000..b455bc2 --- /dev/null +++ b/bmw/bmw_small_speed_cstub.c @@ -0,0 +1,339 @@ +/* bmw_small.c */ +/* + This file is part of the ARM-Crypto-Lib. + Copyright (C) 2006-2010 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ +/* + * \file bmw_small.c + * \author Daniel Otte + * \email daniel.otte@rub.de + * \date 2009-04-27 + * \license GPLv3 or later + * + */ + +#include +#include +#include "bmw_small.h" + +#define SHL32(a,n) ((a)<<(n)) +#define SHR32(a,n) ((a)>>(n)) +#define ROTL32(a,n) (((a)<<(n))|((a)>>(32-(n)))) +#define ROTR32(a,n) (((a)>>(n))|((a)<<(32-(n)))) + + +#define DEBUG 0 + + +#if DEBUG + #include "cli.h" + + void ctx_dump(const bmw_small_ctx_t* ctx){ + uint8_t i; + cli_putstr("\r\n==== ctx dump ===="); + for(i=0; i<16;++i){ + cli_putstr("\r\n h["); + cli_hexdump(&i, 1); + cli_putstr("] = "); + cli_hexdump_rev(&(ctx->h[i]), 4); + } + cli_putstr("\r\n counter = "); + cli_hexdump(&(ctx->counter), 4); + } + + void dump_x(const uint32_t* q, uint8_t elements, char x){ + uint8_t i; + cli_putstr("\r\n==== "); + cli_putc(x); + cli_putstr(" dump ===="); + for(i=0; i. +*/ + +static inline +void bmw_small_f1(uint32_t* q, const uint32_t* m, const uint32_t* h){ + uint32_t even, odd; + uint32x4_t qq16, qq20, qq24, qq28; + uint32x4_t qm0, qm1, qm2; + uint32x4_t qk={0x55555550UL, 0x5aaaaaa5UL, 0x5ffffffaUL, 0x6555554fUL}; + uint32x4_t qkadd={0x15555554UL, 0x15555554UL, 0x15555554UL, 0x15555554UL}; + uint32x2_t dtmp0; + uint32x4x2_t q2tmp0, q2tmp1; + + /* addElement for q16 .. q19 */ + qm0 = *((uint32x4_t*)&(m[ 0])); + qm1 = *((uint32x4_t*)&(m[ 3])); + qm2 = *((uint32x4_t*)&(m[10])); + qm0 = veorq_u32(vshlq_u32(qm0,(int32x4_t){ 1, 2, 3, 4}),vshlq_u32(qm0,(int32x4_t){-31, -30, -29, -28})); + qm1 = veorq_u32(vshlq_u32(qm1,(int32x4_t){ 4, 5, 6, 7}),vshlq_u32(qm1,(int32x4_t){-28, -27, -26, -25})); + qm2 = veorq_u32(vshlq_u32(qm2,(int32x4_t){11, 12, 13, 14}),vshlq_u32(qm2,(int32x4_t){-21, -20, -19, -18})); + qq16 = veorq_u32(vaddq_u32(vaddq_u32(qm0, qm1),vsubq_u32(qk, qm2)), *((uint32x4_t*)&(h[ 7]))); + qk = vaddq_u32(qk, qkadd); + + /* addElement for q20 .. q23 */ + qm0 = *((uint32x4_t*)&(m[ 4])); + qm1 = *((uint32x4_t*)&(m[ 7])); + qm2 = *((uint32x4_t*)&(m[14])); + qm0 = veorq_u32(vshlq_u32(qm0,(int32x4_t){ 5, 6, 7, 8}),vshlq_u32(qm0,(int32x4_t){-27, -26, -25, -24})); + qm1 = veorq_u32(vshlq_u32(qm1,(int32x4_t){ 8, 9, 10, 11}),vshlq_u32(qm1,(int32x4_t){-24, -23, -22, -21})); + qm2 = veorq_u32(vshlq_u32(qm2,(int32x4_t){15, 16, 1, 2}),vshlq_u32(qm2,(int32x4_t){-17, -16, -31, -30})); + qq20 = veorq_u32(vaddq_u32(vaddq_u32(qm0, qm1),vsubq_u32(qk, qm2)), *((uint32x4_t*)&(h[11]))); + qk = vaddq_u32(qk, qkadd); + + /* addElement for q24 .. q27 */ + qm0 = *((uint32x4_t*)&(m[ 8])); + qm1 = *((uint32x4_t*)&(m[11])); + qm2 = *((uint32x4_t*)&(m[18])); + qm0 = veorq_u32(vshlq_u32(qm0,(int32x4_t){ 9, 10, 11, 12}),vshlq_u32(qm0,(int32x4_t){-23, -22, -21, -20})); + qm1 = veorq_u32(vshlq_u32(qm1,(int32x4_t){12, 13, 14, 15}),vshlq_u32(qm1,(int32x4_t){-20, -19, -18, -17})); + qm2 = veorq_u32(vshlq_u32(qm2,(int32x4_t){ 3, 4, 5, 6}),vshlq_u32(qm2,(int32x4_t){-29, -28, -27, -26})); + qq24 = veorq_u32(vaddq_u32(vaddq_u32(qm0, qm1),vsubq_u32(qk, qm2)), *((uint32x4_t*)&(h[15]))); + qk = vaddq_u32(qk, qkadd); + + /* addElement for q28 .. q31 */ + qm0 = *((uint32x4_t*)&(m[12])); + qm1 = *((uint32x4_t*)&(m[15])); + qm2 = *((uint32x4_t*)&(m[22])); + qm0 = veorq_u32(vshlq_u32(qm0,(int32x4_t){13, 14, 15, 16}),vshlq_u32(qm0,(int32x4_t){-19, -18, -17, -16})); + qm1 = veorq_u32(vshlq_u32(qm1,(int32x4_t){16, 1, 2, 3}),vshlq_u32(qm1,(int32x4_t){-16, -31, -30, -29})); + qm2 = veorq_u32(vshlq_u32(qm2,(int32x4_t){ 7, 8, 9, 10}),vshlq_u32(qm2,(int32x4_t){-25, -24, -23, -22})); + qq28 = veorq_u32(vaddq_u32(vaddq_u32(qm0, qm1),vsubq_u32(qk, qm2)), *((uint32x4_t*)&(h[ 3]))); + qk = vaddq_u32(qk, qkadd); + + /* expand1( 0) */ + qm0 = *((uint32x4_t*)&(q[ 0])); + qm1 = veorq_u32(veorq_u32(vshlq_u32(qm0,(int32x4_t){ -1, -2, -2, -1}), + vshlq_u32(qm0,(int32x4_t){ 2, 1, 2, 3})), + veorq_u32(veorq_u32(vshlq_u32(qm0,(int32x4_t){ 8, 12, 15, 4}), + vshlq_u32(qm0,(int32x4_t){-24,-20,-17,-28})), + veorq_u32(vshlq_u32(qm0,(int32x4_t){ 23, 25, 29, 19}), + vshlq_u32(qm0,(int32x4_t){ -9, -7, -3,-13})))); + qm0 = *((uint32x4_t*)&(q[ 4])); + qm2 = veorq_u32(veorq_u32(vshlq_u32(qm0,(int32x4_t){ -1, -2, -2, -1}), + vshlq_u32(qm0,(int32x4_t){ 2, 1, 2, 3})), + veorq_u32(veorq_u32(vshlq_u32(qm0,(int32x4_t){ 8, 12, 15, 4}), + vshlq_u32(qm0,(int32x4_t){-24,-20,-17,-28})), + veorq_u32(vshlq_u32(qm0,(int32x4_t){ 23, 25, 29, 19}), + vshlq_u32(qm0,(int32x4_t){ -9, -7, -3,-13})))); + qm2 = vaddq_u32(qm2, qm1); + qm0 = *((uint32x4_t*)&(q[ 8])); + qm1 = veorq_u32(veorq_u32(vshlq_u32(qm0,(int32x4_t){ -1, -2, -2, -1}), + vshlq_u32(qm0,(int32x4_t){ 2, 1, 2, 3})), + veorq_u32(veorq_u32(vshlq_u32(qm0,(int32x4_t){ 8, 12, 15, 4}), + vshlq_u32(qm0,(int32x4_t){-24,-20,-17,-28})), + veorq_u32(vshlq_u32(qm0,(int32x4_t){ 23, 25, 29, 19}), + vshlq_u32(qm0,(int32x4_t){ -9, -7, -3,-13})))); + qm2 = vaddq_u32(qm2, qm1); + qm0 = *((uint32x4_t*)&(q[12])); + qm1 = veorq_u32(veorq_u32(vshlq_u32(qm0,(int32x4_t){ -1, -2, -2, -1}), + vshlq_u32(qm0,(int32x4_t){ 2, 1, 2, 3})), + veorq_u32(veorq_u32(vshlq_u32(qm0,(int32x4_t){ 8, 12, 15, 4}), + vshlq_u32(qm0,(int32x4_t){-24,-20,-17,-28})), + veorq_u32(vshlq_u32(qm0,(int32x4_t){ 23, 25, 29, 19}), + vshlq_u32(qm0,(int32x4_t){ -9, -7, -3,-13})))); + qm2 = vaddq_u32(qm2, qm1); + dtmp0 = vadd_u32(vget_high_u32(qm2), vget_low_u32(qm2)); + q[16] = vget_lane_u32(dtmp0, 0) + vget_lane_u32(dtmp0, 1) + vgetq_lane_u32(qq16, 0); + + /* expand1( 1) */ + qm0 = *((uint32x4_t*)&(q[ 1])); + qm1 = veorq_u32(veorq_u32(vshlq_u32(qm0,(int32x4_t){ -1, -2, -2, -1}), + vshlq_u32(qm0,(int32x4_t){ 2, 1, 2, 3})), + veorq_u32(veorq_u32(vshlq_u32(qm0,(int32x4_t){ 8, 12, 15, 4}), + vshlq_u32(qm0,(int32x4_t){-24,-20,-17,-28})), + veorq_u32(vshlq_u32(qm0,(int32x4_t){ 23, 25, 29, 19}), + vshlq_u32(qm0,(int32x4_t){ -9, -7, -3,-13})))); + qm0 = *((uint32x4_t*)&(q[ 5])); + qm2 = veorq_u32(veorq_u32(vshlq_u32(qm0,(int32x4_t){ -1, -2, -2, -1}), + vshlq_u32(qm0,(int32x4_t){ 2, 1, 2, 3})), + veorq_u32(veorq_u32(vshlq_u32(qm0,(int32x4_t){ 8, 12, 15, 4}), + vshlq_u32(qm0,(int32x4_t){-24,-20,-17,-28})), + veorq_u32(vshlq_u32(qm0,(int32x4_t){ 23, 25, 29, 19}), + vshlq_u32(qm0,(int32x4_t){ -9, -7, -3,-13})))); + qm2 = vaddq_u32(qm2, qm1); + qm0 = *((uint32x4_t*)&(q[ 9])); + qm1 = veorq_u32(veorq_u32(vshlq_u32(qm0,(int32x4_t){ -1, -2, -2, -1}), + vshlq_u32(qm0,(int32x4_t){ 2, 1, 2, 3})), + veorq_u32(veorq_u32(vshlq_u32(qm0,(int32x4_t){ 8, 12, 15, 4}), + vshlq_u32(qm0,(int32x4_t){-24,-20,-17,-28})), + veorq_u32(vshlq_u32(qm0,(int32x4_t){ 23, 25, 29, 19}), + vshlq_u32(qm0,(int32x4_t){ -9, -7, -3,-13})))); + qm2 = vaddq_u32(qm2, qm1); + qm0 = *((uint32x4_t*)&(q[13])); + qm1 = veorq_u32(veorq_u32(vshlq_u32(qm0,(int32x4_t){ -1, -2, -2, -1}), + vshlq_u32(qm0,(int32x4_t){ 2, 1, 2, 3})), + veorq_u32(veorq_u32(vshlq_u32(qm0,(int32x4_t){ 8, 12, 15, 4}), + vshlq_u32(qm0,(int32x4_t){-24,-20,-17,-28})), + veorq_u32(vshlq_u32(qm0,(int32x4_t){ 23, 25, 29, 19}), + vshlq_u32(qm0,(int32x4_t){ -9, -7, -3,-13})))); + qm2 = vaddq_u32(qm2, qm1); + dtmp0 = vadd_u32(vget_high_u32(qm2), vget_low_u32(qm2)); + q[17] = vget_lane_u32(dtmp0, 0) + vget_lane_u32(dtmp0, 1) + vgetq_lane_u32(qq16, 1); + + /* expand2( 2) */ + q2tmp0 = vld2q_u32(&q[ 2]); + q2tmp1 = vld2q_u32(&q[10]); + q2tmp1.val[0] = vsetq_lane_u32(0, q2tmp1.val[0], 3); + q2tmp0.val[0] = vaddq_u32(q2tmp0.val[0], q2tmp1.val[0]); + dtmp0 = vadd_u32(vget_high_u32(q2tmp0.val[0]), vget_low_u32(q2tmp0.val[0])); + even = vget_lane_u32(dtmp0, 0) + vget_lane_u32(dtmp0, 1); + q[18] = even + ((q[16]>>1)|q[16]); + qm0 = veorq_u32(vshlq_u32(q2tmp0.val[1],(int32x4_t){ 3, 7, 13, 16}), + vshlq_u32(q2tmp0.val[1],(int32x4_t){-29,-25,-19,-16})); + qm1 = veorq_u32(vshlq_u32(q2tmp1.val[1],(int32x4_t){ 19, 23, 27, 0}), + vshlq_u32(q2tmp1.val[1],(int32x4_t){-13, -9, -5, -2})); + qm1 = vaddq_u32(qm1, qm0); + dtmp0 = vadd_u32(vget_high_u32(qm1), vget_low_u32(qm1)); + q[18] += vget_lane_u32(dtmp0, 0) + vget_lane_u32(dtmp0, 1); + + /* expand2( 3) */ + q2tmp0 = vld2q_u32(&q[ 3]); + q2tmp1 = vld2q_u32(&q[11]); + q2tmp1.val[0] = vsetq_lane_u32(0, q2tmp1.val[0], 3); + q2tmp0.val[0] = vaddq_u32(q2tmp0.val[0], q2tmp1.val[0]); + dtmp0 = vadd_u32(vget_high_u32(q2tmp0.val[0]), vget_low_u32(q2tmp0.val[0])); + odd = vget_lane_u32(dtmp0, 0) + vget_lane_u32(dtmp0, 1); + q[19] = odd + ((q[17]>>1)|q[17]); + qm0 = veorq_u32(vshlq_u32(q2tmp0.val[1],(int32x4_t){ 3, 7, 13, 16}), + vshlq_u32(q2tmp0.val[1],(int32x4_t){-29,-25,-19,-16})); + qm1 = veorq_u32(vshlq_u32(q2tmp1.val[1],(int32x4_t){ 19, 23, 27, 0}), + vshlq_u32(q2tmp1.val[1],(int32x4_t){-13, -9, -5, -2})); + qm1 = vaddq_u32(qm1, qm0); + dtmp0 = vadd_u32(vget_high_u32(qm1), vget_low_u32(qm1)); + q[19] += vget_lane_u32(dtmp0, 0) + vget_lane_u32(dtmp0, 1); + + /* expand2( 4) */ + q2tmp0 = vld2q_u32(&q[ 4]); + q2tmp1 = vld2q_u32(&q[12]); + even += q[16] - q[ 2]; + q[20] = even + ((q[18]>>1)|q[18]); + qm0 = veorq_u32(vshlq_u32(q2tmp0.val[1],(int32x4_t){ 3, 7, 13, 16}), + vshlq_u32(q2tmp0.val[1],(int32x4_t){-29,-25,-19,-16})); + qm1 = veorq_u32(vshlq_u32(q2tmp1.val[1],(int32x4_t){ 19, 23, 27, 0}), + vshlq_u32(q2tmp1.val[1],(int32x4_t){-13, -9, -5, -2})); + qm1 = vaddq_u32(qm1, qm0); + dtmp0 = vadd_u32(vget_high_u32(qm1), vget_low_u32(qm1)); + q[20] += vget_lane_u32(dtmp0, 0) + vget_lane_u32(dtmp0, 1); + + /* expand2( 5) */ + q2tmp0 = vld2q_u32(&q[ 5]); + q2tmp1 = vld2q_u32(&q[13]); + odd += q[17] - q[ 3]; + q[21] = odd + ((q[19]>>1)|q[19]); + qm0 = veorq_u32(vshlq_u32(q2tmp0.val[1],(int32x4_t){ 3, 7, 13, 16}), + vshlq_u32(q2tmp0.val[1],(int32x4_t){-29,-25,-19,-16})); + qm1 = veorq_u32(vshlq_u32(q2tmp1.val[1],(int32x4_t){ 19, 23, 27, 0}), + vshlq_u32(q2tmp1.val[1],(int32x4_t){-13, -9, -5, -2})); + qm1 = vaddq_u32(qm1, qm0); + dtmp0 = vadd_u32(vget_high_u32(qm1), vget_low_u32(qm1)); + q[21] += vget_lane_u32(dtmp0, 0) + vget_lane_u32(dtmp0, 1); + + /* expand2( 6) */ + q2tmp0 = vld2q_u32(&q[ 6]); + q2tmp1 = vld2q_u32(&q[14]); + even += q[18] - q[ 4]; + q[22] = even + ((q[20]>>1)|q[20]); + qm0 = veorq_u32(vshlq_u32(q2tmp0.val[1],(int32x4_t){ 3, 7, 13, 16}), + vshlq_u32(q2tmp0.val[1],(int32x4_t){-29,-25,-19,-16})); + qm1 = veorq_u32(vshlq_u32(q2tmp1.val[1],(int32x4_t){ 19, 23, 27, 0}), + vshlq_u32(q2tmp1.val[1],(int32x4_t){-13, -9, -5, -2})); + qm1 = vaddq_u32(qm1, qm0); + dtmp0 = vadd_u32(vget_high_u32(qm1), vget_low_u32(qm1)); + q[22] += vget_lane_u32(dtmp0, 0) + vget_lane_u32(dtmp0, 1); + + /* expand2( 7) */ + q2tmp0 = vld2q_u32(&q[ 7]); + q2tmp1 = vld2q_u32(&q[15]); + odd += q[19] - q[ 5]; + q[23] = odd + ((q[21]>>1)|q[21]); + qm0 = veorq_u32(vshlq_u32(q2tmp0.val[1],(int32x4_t){ 3, 7, 13, 16}), + vshlq_u32(q2tmp0.val[1],(int32x4_t){-29,-25,-19,-16})); + qm1 = veorq_u32(vshlq_u32(q2tmp1.val[1],(int32x4_t){ 19, 23, 27, 0}), + vshlq_u32(q2tmp1.val[1],(int32x4_t){-13, -9, -5, -2})); + qm1 = vaddq_u32(qm1, qm0); + dtmp0 = vadd_u32(vget_high_u32(qm1), vget_low_u32(qm1)); + q[23] += vget_lane_u32(dtmp0, 0) + vget_lane_u32(dtmp0, 1); + + /* expand2( 8) */ + q2tmp0 = vld2q_u32(&q[ 8]); + q2tmp1 = vld2q_u32(&q[16]); + even += q[20] - q[ 6]; + q[24] = even + ((q[22]>>1)|q[22]); + qm0 = veorq_u32(vshlq_u32(q2tmp0.val[1],(int32x4_t){ 3, 7, 13, 16}), + vshlq_u32(q2tmp0.val[1],(int32x4_t){-29,-25,-19,-16})); + qm1 = veorq_u32(vshlq_u32(q2tmp1.val[1],(int32x4_t){ 19, 23, 27, 0}), + vshlq_u32(q2tmp1.val[1],(int32x4_t){-13, -9, -5, -2})); + qm1 = vaddq_u32(qm1, qm0); + dtmp0 = vadd_u32(vget_high_u32(qm1), vget_low_u32(qm1)); + q[24] += vget_lane_u32(dtmp0, 0) + vget_lane_u32(dtmp0, 1); + + /* expand2( 9) */ + q2tmp0 = vld2q_u32(&q[ 9]); + q2tmp1 = vld2q_u32(&q[17]); + odd += q[21] - q[ 7]; + q[25] = odd + ((q[23]>>1)|q[23]); + qm0 = veorq_u32(vshlq_u32(q2tmp0.val[1],(int32x4_t){ 3, 7, 13, 16}), + vshlq_u32(q2tmp0.val[1],(int32x4_t){-29,-25,-19,-16})); + qm1 = veorq_u32(vshlq_u32(q2tmp1.val[1],(int32x4_t){ 19, 23, 27, 0}), + vshlq_u32(q2tmp1.val[1],(int32x4_t){-13, -9, -5, -2})); + qm1 = vaddq_u32(qm1, qm0); + dtmp0 = vadd_u32(vget_high_u32(qm1), vget_low_u32(qm1)); + q[25] += vget_lane_u32(dtmp0, 0) + vget_lane_u32(dtmp0, 1); + + /* expand2(10) */ + q2tmp0 = vld2q_u32(&q[10]); + q2tmp1 = vld2q_u32(&q[18]); + even += q[22] - q[ 8]; + q[26] = even + ((q[24]>>1)|q[24]); + qm0 = veorq_u32(vshlq_u32(q2tmp0.val[1],(int32x4_t){ 3, 7, 13, 16}), + vshlq_u32(q2tmp0.val[1],(int32x4_t){-29,-25,-19,-16})); + qm1 = veorq_u32(vshlq_u32(q2tmp1.val[1],(int32x4_t){ 19, 23, 27, 0}), + vshlq_u32(q2tmp1.val[1],(int32x4_t){-13, -9, -5, -2})); + qm1 = vaddq_u32(qm1, qm0); + dtmp0 = vadd_u32(vget_high_u32(qm1), vget_low_u32(qm1)); + q[26] += vget_lane_u32(dtmp0, 0) + vget_lane_u32(dtmp0, 1); + + /* expand2(11) */ + q2tmp0 = vld2q_u32(&q[11]); + q2tmp1 = vld2q_u32(&q[19]); + odd += q[23] - q[ 9]; + q[27] = odd + ((q[25]>>1)|q[25]); + qm0 = veorq_u32(vshlq_u32(q2tmp0.val[1],(int32x4_t){ 3, 7, 13, 16}), + vshlq_u32(q2tmp0.val[1],(int32x4_t){-29,-25,-19,-16})); + qm1 = veorq_u32(vshlq_u32(q2tmp1.val[1],(int32x4_t){ 19, 23, 27, 0}), + vshlq_u32(q2tmp1.val[1],(int32x4_t){-13, -9, -5, -2})); + qm1 = vaddq_u32(qm1, qm0); + dtmp0 = vadd_u32(vget_high_u32(qm1), vget_low_u32(qm1)); + q[27] += vget_lane_u32(dtmp0, 0) + vget_lane_u32(dtmp0, 1); + + /* expand2(12) */ + q2tmp0 = vld2q_u32(&q[12]); + q2tmp1 = vld2q_u32(&q[20]); + even += q[24] - q[10]; + q[28] = even + ((q[26]>>1)|q[26]); + qm0 = veorq_u32(vshlq_u32(q2tmp0.val[1],(int32x4_t){ 3, 7, 13, 16}), + vshlq_u32(q2tmp0.val[1],(int32x4_t){-29,-25,-19,-16})); + qm1 = veorq_u32(vshlq_u32(q2tmp1.val[1],(int32x4_t){ 19, 23, 27, 0}), + vshlq_u32(q2tmp1.val[1],(int32x4_t){-13, -9, -5, -2})); + qm1 = vaddq_u32(qm1, qm0); + dtmp0 = vadd_u32(vget_high_u32(qm1), vget_low_u32(qm1)); + q[28] += vget_lane_u32(dtmp0, 0) + vget_lane_u32(dtmp0, 1); + + /* expand2(13) */ + q2tmp0 = vld2q_u32(&q[13]); + q2tmp1 = vld2q_u32(&q[21]); + odd += q[25] - q[11]; + q[29] = odd + ((q[27]>>1)|q[27]); + qm0 = veorq_u32(vshlq_u32(q2tmp0.val[1],(int32x4_t){ 3, 7, 13, 16}), + vshlq_u32(q2tmp0.val[1],(int32x4_t){-29,-25,-19,-16})); + qm1 = veorq_u32(vshlq_u32(q2tmp1.val[1],(int32x4_t){ 19, 23, 27, 0}), + vshlq_u32(q2tmp1.val[1],(int32x4_t){-13, -9, -5, -2})); + qm1 = vaddq_u32(qm1, qm0); + dtmp0 = vadd_u32(vget_high_u32(qm1), vget_low_u32(qm1)); + q[29] += vget_lane_u32(dtmp0, 0) + vget_lane_u32(dtmp0, 1); + + /* expand2(14) */ + q2tmp0 = vld2q_u32(&q[14]); + q2tmp1 = vld2q_u32(&q[22]); + even += q[26] - q[12]; + q[30] = even + ((q[28]>>1)|q[28]); + qm0 = veorq_u32(vshlq_u32(q2tmp0.val[1],(int32x4_t){ 3, 7, 13, 16}), + vshlq_u32(q2tmp0.val[1],(int32x4_t){-29,-25,-19,-16})); + qm1 = veorq_u32(vshlq_u32(q2tmp1.val[1],(int32x4_t){ 19, 23, 27, 0}), + vshlq_u32(q2tmp1.val[1],(int32x4_t){-13, -9, -5, -2})); + qm1 = vaddq_u32(qm1, qm0); + dtmp0 = vadd_u32(vget_high_u32(qm1), vget_low_u32(qm1)); + q[30] += vget_lane_u32(dtmp0, 0) + vget_lane_u32(dtmp0, 1); + + /* expand2(15) */ + q2tmp0 = vld2q_u32(&q[15]); + q2tmp1 = vld2q_u32(&q[23]); + odd += q[27] - q[13]; + q[31] = odd + ((q[29]>>1)|q[29]); + qm0 = veorq_u32(vshlq_u32(q2tmp0.val[1],(int32x4_t){ 3, 7, 13, 16}), + vshlq_u32(q2tmp0.val[1],(int32x4_t){-29,-25,-19,-16})); + qm1 = veorq_u32(vshlq_u32(q2tmp1.val[1],(int32x4_t){ 19, 23, 27, 0}), + vshlq_u32(q2tmp1.val[1],(int32x4_t){-13, -9, -5, -2})); + qm1 = vaddq_u32(qm1, qm0); + dtmp0 = vadd_u32(vget_high_u32(qm1), vget_low_u32(qm1)); + q[31] += vget_lane_u32(dtmp0, 0) + vget_lane_u32(dtmp0, 1); +} + +/* END of automatic generated code */ + diff --git a/bmw/gen_f0_arm.rb b/bmw/gen_f0_arm.rb new file mode 100644 index 0000000..c1e2797 --- /dev/null +++ b/bmw/gen_f0_arm.rb @@ -0,0 +1,59 @@ +# gen_f0_arm.rb +=begin + This file is part of the ARM-Crypto-Lib. + Copyright (C) 2006-2010 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +=end +=begin +#define S32_1(x) ( (SHR32((x), 1)) ^ \ + (SHL32((x), 2)) ^ \ + (ROTL32((x), 8)) ^ \ + (ROTR32((x), 9)) ) + +#define S32_2(x) ( (SHR32((x), 2)) ^ \ + (SHL32((x), 1)) ^ \ + (ROTL32((x), 12)) ^ \ + (ROTR32((x), 7)) ) + +#define S32_3(x) ( (SHR32((x), 2)) ^ \ + (SHL32((x), 2)) ^ \ + (ROTL32((x), 15)) ^ \ + (ROTR32((x), 3)) ) + +#define S32_4(x) ( (SHR32((x), 1)) ^ (x)) + +#define S32_5(x) ( (SHR32((x), 2)) ^ (x)) + +=end +$s32_0_lut = [ 1, 3, 4, 13] +$s32_1_lut = [ 1, 2, 8, 9] +$s32_2_lut = [ 2, 1, 12, 7] +$s32_3_lut = [ 2, 2, 15, 3] + +$s32_lut = [$s32_0_lut, $s32_1_lut, $s32_2_lut, $s32_3_lut] + +def s32_0(fout, select, reg0, reg1) + if select<=3 + fout.puts("\tmov %s, %s", reg1, reg0) + fout.puts("\tlsrs %s, %s, #%d", reg0, reg0, $s32_lut[select][0]) + fout.puts("\teor %s, %s, %s, lsl #%d", reg0, reg0, reg1, $s32_lut[select][1]) + fout.puts("\teor %s, %s, %s, ror #%d", reg0, reg0, reg1, 32-$s32_lut[select][2]) + fout.puts("\teor %s, %s, %s, ror #%d", reg0, reg0, reg1, $s32_lut[select][3]) + else + fout.puts("\teor %s, %s, %s, ror #%d", reg0, reg0, reg0, 1) if select==4 + fout.puts("\teor %s, %s, %s, ror #%d", reg0, reg0, reg0, 2) if select==5 + end +end + -- 2.39.5