X-Git-Url: https://git.cryptolib.org/?p=arm-crypto-lib.git;a=blobdiff_plain;f=bmw%2Fautogen_f1_neon_small.rb;fp=bmw%2Fautogen_f1_neon_small.rb;h=6f6df54f7cebd92bb22f6e3913d6137155d584c2;hp=0000000000000000000000000000000000000000;hb=3a80fbe29e33b818ccebbaba7f8bbe48c5ccd173;hpb=2a4779378a7bf4322a0e6b2024284092135e8a3d diff --git a/bmw/autogen_f1_neon_small.rb b/bmw/autogen_f1_neon_small.rb new file mode 100644 index 0000000..6f6df54 --- /dev/null +++ b/bmw/autogen_f1_neon_small.rb @@ -0,0 +1,213 @@ +# autogen f1 function for BMW +=begin + This file is part of the ARM-Crypto-Lib. + Copyright (C) 2006-2010 Daniel Otte (daniel.otte@rub.de) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +=end + + +header = <. +*/ + +static inline +void bmw_small_f1(uint32_t* q, const uint32_t* m, const uint32_t* h){ + uint32_t even, odd; + uint32x4_t qq16, qq20, qq24, qq28; + uint32x4_t qm0, qm1, qm2; + uint32x4_t qk={0x55555550UL, 0x5aaaaaa5UL, 0x5ffffffaUL, 0x6555554fUL}; + uint32x4_t qkadd={0x15555554UL, 0x15555554UL, 0x15555554UL, 0x15555554UL}; + uint32x2_t dtmp0; + uint32x4x2_t q2tmp0, q2tmp1; +EOF + +footer = <>1)|q[%2d]);\n", i+16, (i%2==0)?"even":"odd ", i+14, i+14) + s += sprintf(" qm0 = veorq_u32(vshlq_u32(q2tmp0.val[1],(int32x4_t){ 3, 7, 13, 16}),\n" \ + " vshlq_u32(q2tmp0.val[1],(int32x4_t){-29,-25,-19,-16}));\n") + s += sprintf(" qm1 = veorq_u32(vshlq_u32(q2tmp1.val[1],(int32x4_t){ 19, 23, 27, 0}),\n" \ + " vshlq_u32(q2tmp1.val[1],(int32x4_t){-13, -9, -5, -2}));\n") + s += sprintf(" qm1 = vaddq_u32(qm1, qm0);\n") + s += sprintf(" dtmp0 = vadd_u32(vget_high_u32(qm1), vget_low_u32(qm1));\n") + s += sprintf(" q[%2d] += vget_lane_u32(dtmp0, 0) + vget_lane_u32(dtmp0, 1);\n", i+16) + + return s +end + + +puts header +[16,20,24,28].each {|x| puts gen_addElement(x)} +(0..1).each {|x| puts gen_expand_1(x)} +(2..15).each {|x| puts gen_expand_2(x, 2)} +puts footer \ No newline at end of file