X-Git-Url: https://git.cryptolib.org/?p=arm-crypto-lib.git;a=blobdiff_plain;f=bmw%2Fautogen_f1_neon_small.rb;fp=bmw%2Fautogen_f1_neon_small.rb;h=6f6df54f7cebd92bb22f6e3913d6137155d584c2;hp=0000000000000000000000000000000000000000;hb=3a80fbe29e33b818ccebbaba7f8bbe48c5ccd173;hpb=2a4779378a7bf4322a0e6b2024284092135e8a3d
diff --git a/bmw/autogen_f1_neon_small.rb b/bmw/autogen_f1_neon_small.rb
new file mode 100644
index 0000000..6f6df54
--- /dev/null
+++ b/bmw/autogen_f1_neon_small.rb
@@ -0,0 +1,213 @@
+# autogen f1 function for BMW
+=begin
+ This file is part of the ARM-Crypto-Lib.
+ Copyright (C) 2006-2010 Daniel Otte (daniel.otte@rub.de)
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+=end
+
+
+header = <.
+*/
+
+static inline
+void bmw_small_f1(uint32_t* q, const uint32_t* m, const uint32_t* h){
+ uint32_t even, odd;
+ uint32x4_t qq16, qq20, qq24, qq28;
+ uint32x4_t qm0, qm1, qm2;
+ uint32x4_t qk={0x55555550UL, 0x5aaaaaa5UL, 0x5ffffffaUL, 0x6555554fUL};
+ uint32x4_t qkadd={0x15555554UL, 0x15555554UL, 0x15555554UL, 0x15555554UL};
+ uint32x2_t dtmp0;
+ uint32x4x2_t q2tmp0, q2tmp1;
+EOF
+
+footer = <>1)|q[%2d]);\n", i+16, (i%2==0)?"even":"odd ", i+14, i+14)
+ s += sprintf(" qm0 = veorq_u32(vshlq_u32(q2tmp0.val[1],(int32x4_t){ 3, 7, 13, 16}),\n" \
+ " vshlq_u32(q2tmp0.val[1],(int32x4_t){-29,-25,-19,-16}));\n")
+ s += sprintf(" qm1 = veorq_u32(vshlq_u32(q2tmp1.val[1],(int32x4_t){ 19, 23, 27, 0}),\n" \
+ " vshlq_u32(q2tmp1.val[1],(int32x4_t){-13, -9, -5, -2}));\n")
+ s += sprintf(" qm1 = vaddq_u32(qm1, qm0);\n")
+ s += sprintf(" dtmp0 = vadd_u32(vget_high_u32(qm1), vget_low_u32(qm1));\n")
+ s += sprintf(" q[%2d] += vget_lane_u32(dtmp0, 0) + vget_lane_u32(dtmp0, 1);\n", i+16)
+
+ return s
+end
+
+
+puts header
+[16,20,24,28].each {|x| puts gen_addElement(x)}
+(0..1).each {|x| puts gen_expand_1(x)}
+(2..15).each {|x| puts gen_expand_2(x, 2)}
+puts footer
\ No newline at end of file