]> git.cryptolib.org Git - arm-crypto-lib.git/commitdiff
small update
authorbg <bg@cypex>
Sun, 11 Jul 2010 16:21:36 +0000 (18:21 +0200)
committerbg <bg@cypex>
Sun, 11 Jul 2010 16:21:36 +0000 (18:21 +0200)
bmw/analyze_f0.rb
bmw/bmw_large_speed.c
bmw/bmw_small_speed.c
bmw/f1_autogen.c [deleted file]
bmw/f1_autogen.i [new file with mode: 0644]
bmw/f1_autogen_large.c [deleted file]
bmw/f1_autogen_large.i [new file with mode: 0644]
skein/skein256.c

index 465e56ee9031ea0ecc6e4325c9a14b13ad4856c8..0add504de8b894b49665000448b10e87d6e789b5 100644 (file)
@@ -118,7 +118,8 @@ def print_reg_map(map, regs, length, indent=0)
       if map[x][r]==nil
         print ' '
       else 
-        print map[x][r]
+        print map[x][r] if map[x][r].class == String
+        print map[x][r].to_s(36) if map[x][r].class == Fixnum
       end
     end
     print "\n"
@@ -195,7 +196,8 @@ def bits_set_simple(x)
 end
 
 def init_bitcount_lut
-  (0..(2**8-1)).each {|x| $bitcount_lut[x] = bits_set_simple(x)}
+  (0..(2**4-1)).each  {|x| $bitcount_lut[x] = bits_set_simple(x)}
+  ((2**4)..(2**8-1)).each  {|x| $bitcount_lut[x] = bits_set(x, 4)}
   ((2**8)..(2**16-1)).each {|x| $bitcount_lut[x] = bits_set(x, 8)}
 end
 
@@ -221,7 +223,7 @@ def decode_word(word)
   return r
 end
 
-def generate_c_code(fout, func, optimizations=[], reg_map=[])
+def generate_c_code(fout, func, optimizations=[], reg_map=[], use_map=[])
   out_interval = 3
   out_modulus = 16
   out_idx = 0
@@ -259,10 +261,10 @@ def generate_c_code(fout, func, optimizations=[], reg_map=[])
       opt_table[step] << [sign_a, sign_b, sign_out, reg_name, reg_a, reg_b, set, free]
     end
   end
-  puts 'DBG: '+opt_table.inspect
   (0..(func.length-1)).each do |i|
     fout.printf("q[%2d] = ", out_idx)
     out_idx = (out_idx+out_interval)%out_modulus
+    use_map << Array.new
     func[i].each do |j|
       skip = 0
       if opt_table[i]
@@ -271,12 +273,15 @@ def generate_c_code(fout, func, optimizations=[], reg_map=[])
         end
       end
       fout.printf("%st[%2d] ", j[0].chr, j[1..-1].to_i) if skip==0
+      use_map[-1] << j[1..-1].to_i if skip==0
     end
     if opt_table[i]
       opt_table[i].each do |opt|
         fout.print(opt[2]+'('+opt[3])
         if opt[6]
           fout.printf('=%st[%2d]%st[%2d]',opt[0],opt[4].to_i,opt[1],opt[5].to_i)
+          use_map[-1] << opt[4].to_i
+          use_map[-1] << opt[5].to_i
         end
         fout.print(') ')
       end
@@ -285,6 +290,67 @@ def generate_c_code(fout, func, optimizations=[], reg_map=[])
   end
 end
 
+class Array
+  def find_max_index
+    return nil if self.length==0
+    maxidx=0
+    max=self[0]
+    self.each do |i|
+      if(self[i]!=nil && max<self[i])
+        maxidx = i
+        max = self[i]
+      end
+    end
+    return maxidx
+  end
+
+end
+
+def calculate_load_pressure(use_map, use_locations, regs, steps)
+  loads=0
+  reg_map = Array.new(steps)
+  (0..(reg_map.length-1)).each{|i| reg_map[i]=Array.new(regs)}
+  (0..(steps-1)).each do |step|
+    use_locations.each do |e|
+      e.pop if e[-1] && e[-1]<step
+    end
+    local_use_map = Array.new(regs)
+    reg_map[step] = reg_map[step-1].clone if step>0
+    #(0..(regs-1)).each {|i| reg_map[step][i] = reg_map[step-1][i]}
+    use_map[step].each do |entry|
+#      print 'DBG: step='+step.to_s+' entry='+entry.to_s
+      found = reg_map[step].find_index(entry)
+      if found!=nil
+#        print ' (direct)'
+        reg_map[step][found] = entry
+        local_use_map[found] = 1
+      else 
+        loads += 1
+        if t0=reg_map[step].find_index(nil)
+#          print ' (found unsused slot)'
+          reg_map[step][t0] = entry
+          local_use_map[t0] = 1
+        else
+          # find a register to clear
+          a = reg_map[step].collect {|e| use_locations[e][-1]}
+          if t1 = a.find_index(nil)
+#            print ' (found not further used slot)'
+            reg_map[step][t1] = entry
+          else
+#            print ' (reassigned slot)'
+            reg_map[step][a.find_max_index] = entry
+          end
+        end
+      end
+#      print "\n"
+    end
+#    puts 'DBG: map part ('+step.to_s+'): '+reg_map[step].inspect
+  end
+  return loads, reg_map
+end
+
+################################################################################
+
 (0..15).each do |i|
   (0..3). each do |j|
     ((j+1)..4).each do |k|
@@ -355,25 +421,31 @@ puts "initializing bitcount table..."
 init_bitcount_lut
 
 puts "collision free combinations:"
-max = 0
-combinations = Array.new
-percent = 0
-percent_step =(2**dublets.length-1)/10000.0
-next_step = (2**dublets.length-1)
-puts ''
-(2**dublets.length-1).downto(0) do |x|
-  if(x<=next_step)
-    print "\x1b[s "+sprintf("%5.2f%%", percent/100.0)+"\x1b[u"
-    percent += 1
-    next_step -= percent_step
-  end
-  if check_collision(x, collision_lut) == false
-    if bits_set(x)>= max
-      combinations = Array.new if bits_set(x)>max
-      combinations << x
-      max = bits_set(x)
+puts "(from cache)"
+combinations = [354997, 94005, 93877]
+if combinations==nil
+  max = 0
+  combinations = Array.new
+  percent = 0
+  percent_step =(2**dublets.length-1)/10000.0
+  next_step = (2**dublets.length-1)
+  puts ''
+  (2**dublets.length-1).downto(0) do |x|
+    if(x<=next_step)
+      print "\x1b[s "+sprintf("%5.2f%%", percent/100.0)+"\x1b[u"
+      percent += 1
+      next_step -= percent_step
+    end
+    if check_collision(x, collision_lut) == false
+      if bits_set(x)>= max
+        combinations = Array.new if bits_set(x)>max
+        combinations << x
+        max = bits_set(x)
+      end
     end
   end
+  
+  puts 'DBG: combinations: '+combinations.inspect
 end
 
 combinations.each do |c|
@@ -383,4 +455,33 @@ combinations.each do |c|
 end
 steps = word_to_steps(combinations[-1], dublets)
 regs, reg_map = reg_map(steps, f0_def.length)
-generate_c_code(STDOUT, f0_def,steps, reg_map)
+use_map = []
+generate_c_code(STDOUT, f0_def,steps, reg_map, use_map)
+puts 'DBG: '
+use_map.each do |q|
+  print "\t[ "
+  print q.collect {|v| v.to_s(16)}.join(', ')
+  print " ]\n"
+end
+reg_use_locations = Array.new(f0_def.length)
+(0..(reg_use_locations.length-1)).each{|x| reg_use_locations[x] = Array.new}
+
+(0..(f0_def.length-1)).each do |i|
+  use_map[i].each do |x|
+    reg_use_locations[x]  << i
+  end
+end
+
+reg_use_locations.each{|x| x.reverse!}
+#puts 'DBG: '+reg_use_locations.inspect
+#puts 'DBG: (16 regs) '+calculate_load_pressure(use_map, reg_use_locations, 16, 16).inspect
+#puts 'DBG: ( 8 regs) '+calculate_load_pressure(use_map, reg_use_locations,  8, 16).inspect
+(4..16).each do |regs|
+  p,m = calculate_load_pressure(use_map, reg_use_locations, regs, 16)
+  puts "=#{regs} registers="
+  puts "  load pressure: " +p.to_s
+  puts "  map: "
+  print_reg_map(m, regs, 16, 4)
+#  puts "DBG: reg_map: "+m.inspect
+#  puts "DBG: use_map: "+use_map.inspect
+end
index 583b6f39186ee5c5276f2deb61d40384114c447b..b5ae2b48fe6735319663e8653f9a14cb356e6424 100644 (file)
 #define R64_6(x)    (ROTR64((x), 21))
 #define R64_7(x)    (ROTR64((x), 11))
 
-#include "f1_autogen_large.c"
+#include "f1_autogen_large.i"
 
 static inline
 void bmw_large_f0(uint64_t* q, uint64_t* h, const uint64_t* m){
index f1c5ed4274868fa5855c29cbd618612356895476..d1d70e068a4f2a0eeb2bd8bdc0f6885698f01fe1 100644 (file)
 #define R32_7(x)   (ROTR32((x),  5))
 
 
-#include "f1_autogen.c"
+#include "f1_autogen.i"
 
 static inline
 void bmw_small_f0(uint32_t* q, uint32_t* h, const uint32_t* m){
@@ -150,39 +150,39 @@ void bmw_small_f0(uint32_t* q, uint32_t* h, const uint32_t* m){
        q[13] = (t[ 2] + t[ 4] + t[ 7] + t[10] + t[11]);
        */
        q[ 0] = +t[ 5] +t[10] +t[13] +(tr1=-t[ 7]+t[14]) ;
+       q[ 0] = S32_0(q[ 0]) + h[ 1];
        q[ 3] = +t[ 8] +t[13] +t[ 0] +(tr2=-t[ 1]-t[10]) ;
+       q[ 3] = S32_3(q[ 3]) + h[ 4];
        q[ 6] = -t[11] +t[13] -t[ 0] -t[ 3] +t[ 4] ;
+       q[ 6] = S32_1(q[ 6]) + h[ 7];
        q[ 9] = +t[ 0] +(tr0=-t[ 3]+t[ 6]) +(tr1) ;
+       q[ 9] = S32_4(q[ 9]) + h[10];
        q[12] = -t[ 9] -(tr0) -(tr2) ;
+       q[12] = S32_2(q[12]) + h[13];
        q[15] = -t[ 4] +(tr0=-t[ 9]+t[12]) +(tr1=-t[ 6]+t[13]) ;
+       q[15] = S32_0(q[15]) + h[ 0];
        q[ 2] = +t[ 7] +t[15] +t[ 0] -(tr0) ;
+       q[ 2] = S32_2(q[ 2]) + h[ 3];
        q[ 5] = +t[10] +(tr0=-t[ 2]+t[15]) +(tr2=+t[ 3]-t[12]) ;
+       q[ 5] = S32_0(q[ 5]) + h[ 6];
        q[ 8] = -t[ 5] -(tr0) +(tr1) ;
+       q[ 8] = S32_3(q[ 8]) + h[ 9];
        q[11] = -t[ 0] -t[ 2] +t[ 9] +(tr0=-t[ 5]+t[ 8]) ;
+       q[11] = S32_1(q[11]) + h[12];
        q[14] = -t[11] +(tr0) +(tr2) ;
+       q[14] = S32_4(q[14]) + h[15];
        q[ 1] = +t[ 6] +(tr0=+t[11]+t[14]) +(tr1=-t[ 8]-t[15]) ;
+       q[ 1] = S32_1(q[ 1]) + h[ 2];
        q[ 4] = +t[ 9] +t[ 1] +t[ 2] -(tr0) ;
+       q[ 4] = S32_4(q[ 4]) + h[ 5];
        q[ 7] = -t[12] -t[14] +t[ 1] -t[ 4] -t[ 5] ;
+       q[ 7] = S32_2(q[ 7]) + h[ 8];
        q[10] = -t[ 1] +(tr0=-t[ 4]-t[ 7]) -(tr1) ;
+       q[10] = S32_0(q[10]) + h[11];
        q[13] = +t[ 2] +t[10] +t[11] -(tr0) ;
+       q[13] = S32_3(q[13]) + h[14];
 
        dump_x(q, 16, 'W');
-       q[ 0] = S32_0(q[ 0]) + h[ 1];
-       q[ 1] = S32_1(q[ 1]) + h[ 2];
-       q[ 2] = S32_2(q[ 2]) + h[ 3];
-       q[ 3] = S32_3(q[ 3]) + h[ 4];
-       q[ 4] = S32_4(q[ 4]) + h[ 5];
-       q[ 5] = S32_0(q[ 5]) + h[ 6];
-       q[ 6] = S32_1(q[ 6]) + h[ 7];
-       q[ 7] = S32_2(q[ 7]) + h[ 8];
-       q[ 8] = S32_3(q[ 8]) + h[ 9];
-       q[ 9] = S32_4(q[ 9]) + h[10];
-       q[10] = S32_0(q[10]) + h[11];
-       q[11] = S32_1(q[11]) + h[12];
-       q[12] = S32_2(q[12]) + h[13];
-       q[13] = S32_3(q[13]) + h[14];
-       q[14] = S32_4(q[14]) + h[15];
-       q[15] = S32_0(q[15]) + h[ 0];
 }
 
 static inline
diff --git a/bmw/f1_autogen.c b/bmw/f1_autogen.c
deleted file mode 100644 (file)
index 31217b7..0000000
+++ /dev/null
@@ -1,200 +0,0 @@
-/* BEGIN of automatic generated code */
-
-static inline
-void bmw_small_f1(uint32_t* q, const void* m, const void* h){ 
-/* expand_1( 0) */
-       q[16] = 
-               ((  ROTL32(((uint32_t*)m)[ 0], 1) 
-                 + ROTL32(((uint32_t*)m)[ 3], 4) 
-                 - ROTL32(((uint32_t*)m)[10], 11) 
-                 + 0x55555550UL 
-                )^ ((uint32_t*)h)[ 7] 
-               )
-               + S32_1(q[ 0]) + S32_2(q[ 1]) + S32_3(q[ 2]) + S32_0(q[ 3])
-               + S32_1(q[ 4]) + S32_2(q[ 5]) + S32_3(q[ 6]) + S32_0(q[ 7])
-               + S32_1(q[ 8]) + S32_2(q[ 9]) + S32_3(q[10]) + S32_0(q[11])
-               + S32_1(q[12]) + S32_2(q[13]) + S32_3(q[14]) + S32_0(q[15]);
-/* expand_1( 1) */
-       q[17] = 
-               ((  ROTL32(((uint32_t*)m)[ 1], 2) 
-                 + ROTL32(((uint32_t*)m)[ 4], 5) 
-                 - ROTL32(((uint32_t*)m)[11], 12) 
-                 + 0x5aaaaaa5UL 
-                )^ ((uint32_t*)h)[ 8] 
-               )
-               + S32_1(q[ 1]) + S32_2(q[ 2]) + S32_3(q[ 3]) + S32_0(q[ 4])
-               + S32_1(q[ 5]) + S32_2(q[ 6]) + S32_3(q[ 7]) + S32_0(q[ 8])
-               + S32_1(q[ 9]) + S32_2(q[10]) + S32_3(q[11]) + S32_0(q[12])
-               + S32_1(q[13]) + S32_2(q[14]) + S32_3(q[15]) + S32_0(q[16]);
-/* expand_2( 2) */
-       q[18] = 
-               ((  ROTL32(((uint32_t*)m)[ 2], 3) 
-                 + ROTL32(((uint32_t*)m)[ 5], 6) 
-                 - ROTL32(((uint32_t*)m)[12], 13) 
-                 + 0x5ffffffaUL 
-                )^ ((uint32_t*)h)[ 9] 
-               )
-               +       q[ 2]  + R32_1(q[ 3]) +       q[ 4]  + R32_2(q[ 5])
-               +       q[ 6]  + R32_3(q[ 7]) +       q[ 8]  + R32_4(q[ 9])
-               +       q[10]  + R32_5(q[11]) +       q[12]  + R32_6(q[13])
-               +       q[14]  + R32_7(q[15]) + S32_4(q[16]) + S32_5(q[17]);
-/* expand_2( 3) */
-       q[19] = 
-               ((  ROTL32(((uint32_t*)m)[ 3], 4) 
-                 + ROTL32(((uint32_t*)m)[ 6], 7) 
-                 - ROTL32(((uint32_t*)m)[13], 14) 
-                 + 0x6555554fUL 
-                )^ ((uint32_t*)h)[10] 
-               )
-               +       q[ 3]  + R32_1(q[ 4]) +       q[ 5]  + R32_2(q[ 6])
-               +       q[ 7]  + R32_3(q[ 8]) +       q[ 9]  + R32_4(q[10])
-               +       q[11]  + R32_5(q[12]) +       q[13]  + R32_6(q[14])
-               +       q[15]  + R32_7(q[16]) + S32_4(q[17]) + S32_5(q[18]);
-/* expand_2( 4) */
-       q[20] = 
-               ((  ROTL32(((uint32_t*)m)[ 4], 5) 
-                 + ROTL32(((uint32_t*)m)[ 7], 8) 
-                 - ROTL32(((uint32_t*)m)[14], 15) 
-                 + 0x6aaaaaa4UL 
-                )^ ((uint32_t*)h)[11] 
-               )
-               +       q[ 4]  + R32_1(q[ 5]) +       q[ 6]  + R32_2(q[ 7])
-               +       q[ 8]  + R32_3(q[ 9]) +       q[10]  + R32_4(q[11])
-               +       q[12]  + R32_5(q[13]) +       q[14]  + R32_6(q[15])
-               +       q[16]  + R32_7(q[17]) + S32_4(q[18]) + S32_5(q[19]);
-/* expand_2( 5) */
-       q[21] = 
-               ((  ROTL32(((uint32_t*)m)[ 5], 6) 
-                 + ROTL32(((uint32_t*)m)[ 8], 9) 
-                 - ROTL32(((uint32_t*)m)[15], 16) 
-                 + 0x6ffffff9UL 
-                )^ ((uint32_t*)h)[12] 
-               )
-               +       q[ 5]  + R32_1(q[ 6]) +       q[ 7]  + R32_2(q[ 8])
-               +       q[ 9]  + R32_3(q[10]) +       q[11]  + R32_4(q[12])
-               +       q[13]  + R32_5(q[14]) +       q[15]  + R32_6(q[16])
-               +       q[17]  + R32_7(q[18]) + S32_4(q[19]) + S32_5(q[20]);
-/* expand_2( 6) */
-       q[22] = 
-               ((  ROTL32(((uint32_t*)m)[ 6], 7) 
-                 + ROTL32(((uint32_t*)m)[ 9], 10) 
-                 - ROTL32(((uint32_t*)m)[ 0], 1) 
-                 + 0x7555554eUL 
-                )^ ((uint32_t*)h)[13] 
-               )
-               +       q[ 6]  + R32_1(q[ 7]) +       q[ 8]  + R32_2(q[ 9])
-               +       q[10]  + R32_3(q[11]) +       q[12]  + R32_4(q[13])
-               +       q[14]  + R32_5(q[15]) +       q[16]  + R32_6(q[17])
-               +       q[18]  + R32_7(q[19]) + S32_4(q[20]) + S32_5(q[21]);
-/* expand_2( 7) */
-       q[23] = 
-               ((  ROTL32(((uint32_t*)m)[ 7], 8) 
-                 + ROTL32(((uint32_t*)m)[10], 11) 
-                 - ROTL32(((uint32_t*)m)[ 1], 2) 
-                 + 0x7aaaaaa3UL 
-                )^ ((uint32_t*)h)[14] 
-               )
-               +       q[ 7]  + R32_1(q[ 8]) +       q[ 9]  + R32_2(q[10])
-               +       q[11]  + R32_3(q[12]) +       q[13]  + R32_4(q[14])
-               +       q[15]  + R32_5(q[16]) +       q[17]  + R32_6(q[18])
-               +       q[19]  + R32_7(q[20]) + S32_4(q[21]) + S32_5(q[22]);
-/* expand_2( 8) */
-       q[24] = 
-               ((  ROTL32(((uint32_t*)m)[ 8], 9) 
-                 + ROTL32(((uint32_t*)m)[11], 12) 
-                 - ROTL32(((uint32_t*)m)[ 2], 3) 
-                 + 0x7ffffff8UL 
-                )^ ((uint32_t*)h)[15] 
-               )
-               +       q[ 8]  + R32_1(q[ 9]) +       q[10]  + R32_2(q[11])
-               +       q[12]  + R32_3(q[13]) +       q[14]  + R32_4(q[15])
-               +       q[16]  + R32_5(q[17]) +       q[18]  + R32_6(q[19])
-               +       q[20]  + R32_7(q[21]) + S32_4(q[22]) + S32_5(q[23]);
-/* expand_2( 9) */
-       q[25] = 
-               ((  ROTL32(((uint32_t*)m)[ 9], 10) 
-                 + ROTL32(((uint32_t*)m)[12], 13) 
-                 - ROTL32(((uint32_t*)m)[ 3], 4) 
-                 + 0x8555554dUL 
-                )^ ((uint32_t*)h)[ 0] 
-               )
-               +       q[ 9]  + R32_1(q[10]) +       q[11]  + R32_2(q[12])
-               +       q[13]  + R32_3(q[14]) +       q[15]  + R32_4(q[16])
-               +       q[17]  + R32_5(q[18]) +       q[19]  + R32_6(q[20])
-               +       q[21]  + R32_7(q[22]) + S32_4(q[23]) + S32_5(q[24]);
-/* expand_2(10) */
-       q[26] = 
-               ((  ROTL32(((uint32_t*)m)[10], 11) 
-                 + ROTL32(((uint32_t*)m)[13], 14) 
-                 - ROTL32(((uint32_t*)m)[ 4], 5) 
-                 + 0x8aaaaaa2UL 
-                )^ ((uint32_t*)h)[ 1] 
-               )
-               +       q[10]  + R32_1(q[11]) +       q[12]  + R32_2(q[13])
-               +       q[14]  + R32_3(q[15]) +       q[16]  + R32_4(q[17])
-               +       q[18]  + R32_5(q[19]) +       q[20]  + R32_6(q[21])
-               +       q[22]  + R32_7(q[23]) + S32_4(q[24]) + S32_5(q[25]);
-/* expand_2(11) */
-       q[27] = 
-               ((  ROTL32(((uint32_t*)m)[11], 12) 
-                 + ROTL32(((uint32_t*)m)[14], 15) 
-                 - ROTL32(((uint32_t*)m)[ 5], 6) 
-                 + 0x8ffffff7UL 
-                )^ ((uint32_t*)h)[ 2] 
-               )
-               +       q[11]  + R32_1(q[12]) +       q[13]  + R32_2(q[14])
-               +       q[15]  + R32_3(q[16]) +       q[17]  + R32_4(q[18])
-               +       q[19]  + R32_5(q[20]) +       q[21]  + R32_6(q[22])
-               +       q[23]  + R32_7(q[24]) + S32_4(q[25]) + S32_5(q[26]);
-/* expand_2(12) */
-       q[28] = 
-               ((  ROTL32(((uint32_t*)m)[12], 13) 
-                 + ROTL32(((uint32_t*)m)[15], 16) 
-                 - ROTL32(((uint32_t*)m)[ 6], 7) 
-                 + 0x9555554cUL 
-                )^ ((uint32_t*)h)[ 3] 
-               )
-               +       q[12]  + R32_1(q[13]) +       q[14]  + R32_2(q[15])
-               +       q[16]  + R32_3(q[17]) +       q[18]  + R32_4(q[19])
-               +       q[20]  + R32_5(q[21]) +       q[22]  + R32_6(q[23])
-               +       q[24]  + R32_7(q[25]) + S32_4(q[26]) + S32_5(q[27]);
-/* expand_2(13) */
-       q[29] = 
-               ((  ROTL32(((uint32_t*)m)[13], 14) 
-                 + ROTL32(((uint32_t*)m)[ 0], 1) 
-                 - ROTL32(((uint32_t*)m)[ 7], 8) 
-                 + 0x9aaaaaa1UL 
-                )^ ((uint32_t*)h)[ 4] 
-               )
-               +       q[13]  + R32_1(q[14]) +       q[15]  + R32_2(q[16])
-               +       q[17]  + R32_3(q[18]) +       q[19]  + R32_4(q[20])
-               +       q[21]  + R32_5(q[22]) +       q[23]  + R32_6(q[24])
-               +       q[25]  + R32_7(q[26]) + S32_4(q[27]) + S32_5(q[28]);
-/* expand_2(14) */
-       q[30] = 
-               ((  ROTL32(((uint32_t*)m)[14], 15) 
-                 + ROTL32(((uint32_t*)m)[ 1], 2) 
-                 - ROTL32(((uint32_t*)m)[ 8], 9) 
-                 + 0x9ffffff6UL 
-                )^ ((uint32_t*)h)[ 5] 
-               )
-               +       q[14]  + R32_1(q[15]) +       q[16]  + R32_2(q[17])
-               +       q[18]  + R32_3(q[19]) +       q[20]  + R32_4(q[21])
-               +       q[22]  + R32_5(q[23]) +       q[24]  + R32_6(q[25])
-               +       q[26]  + R32_7(q[27]) + S32_4(q[28]) + S32_5(q[29]);
-/* expand_2(15) */
-       q[31] = 
-               ((  ROTL32(((uint32_t*)m)[15], 16) 
-                 + ROTL32(((uint32_t*)m)[ 2], 3) 
-                 - ROTL32(((uint32_t*)m)[ 9], 10) 
-                 + 0xa555554bUL 
-                )^ ((uint32_t*)h)[ 6] 
-               )
-               +       q[15]  + R32_1(q[16]) +       q[17]  + R32_2(q[18])
-               +       q[19]  + R32_3(q[20]) +       q[21]  + R32_4(q[22])
-               +       q[23]  + R32_5(q[24]) +       q[25]  + R32_6(q[26])
-               +       q[27]  + R32_7(q[28]) + S32_4(q[29]) + S32_5(q[30]);
-}
-
-/* END of automatic generated code */
-
diff --git a/bmw/f1_autogen.i b/bmw/f1_autogen.i
new file mode 100644 (file)
index 0000000..31217b7
--- /dev/null
@@ -0,0 +1,200 @@
+/* BEGIN of automatic generated code */
+
+static inline
+void bmw_small_f1(uint32_t* q, const void* m, const void* h){ 
+/* expand_1( 0) */
+       q[16] = 
+               ((  ROTL32(((uint32_t*)m)[ 0], 1) 
+                 + ROTL32(((uint32_t*)m)[ 3], 4) 
+                 - ROTL32(((uint32_t*)m)[10], 11) 
+                 + 0x55555550UL 
+                )^ ((uint32_t*)h)[ 7] 
+               )
+               + S32_1(q[ 0]) + S32_2(q[ 1]) + S32_3(q[ 2]) + S32_0(q[ 3])
+               + S32_1(q[ 4]) + S32_2(q[ 5]) + S32_3(q[ 6]) + S32_0(q[ 7])
+               + S32_1(q[ 8]) + S32_2(q[ 9]) + S32_3(q[10]) + S32_0(q[11])
+               + S32_1(q[12]) + S32_2(q[13]) + S32_3(q[14]) + S32_0(q[15]);
+/* expand_1( 1) */
+       q[17] = 
+               ((  ROTL32(((uint32_t*)m)[ 1], 2) 
+                 + ROTL32(((uint32_t*)m)[ 4], 5) 
+                 - ROTL32(((uint32_t*)m)[11], 12) 
+                 + 0x5aaaaaa5UL 
+                )^ ((uint32_t*)h)[ 8] 
+               )
+               + S32_1(q[ 1]) + S32_2(q[ 2]) + S32_3(q[ 3]) + S32_0(q[ 4])
+               + S32_1(q[ 5]) + S32_2(q[ 6]) + S32_3(q[ 7]) + S32_0(q[ 8])
+               + S32_1(q[ 9]) + S32_2(q[10]) + S32_3(q[11]) + S32_0(q[12])
+               + S32_1(q[13]) + S32_2(q[14]) + S32_3(q[15]) + S32_0(q[16]);
+/* expand_2( 2) */
+       q[18] = 
+               ((  ROTL32(((uint32_t*)m)[ 2], 3) 
+                 + ROTL32(((uint32_t*)m)[ 5], 6) 
+                 - ROTL32(((uint32_t*)m)[12], 13) 
+                 + 0x5ffffffaUL 
+                )^ ((uint32_t*)h)[ 9] 
+               )
+               +       q[ 2]  + R32_1(q[ 3]) +       q[ 4]  + R32_2(q[ 5])
+               +       q[ 6]  + R32_3(q[ 7]) +       q[ 8]  + R32_4(q[ 9])
+               +       q[10]  + R32_5(q[11]) +       q[12]  + R32_6(q[13])
+               +       q[14]  + R32_7(q[15]) + S32_4(q[16]) + S32_5(q[17]);
+/* expand_2( 3) */
+       q[19] = 
+               ((  ROTL32(((uint32_t*)m)[ 3], 4) 
+                 + ROTL32(((uint32_t*)m)[ 6], 7) 
+                 - ROTL32(((uint32_t*)m)[13], 14) 
+                 + 0x6555554fUL 
+                )^ ((uint32_t*)h)[10] 
+               )
+               +       q[ 3]  + R32_1(q[ 4]) +       q[ 5]  + R32_2(q[ 6])
+               +       q[ 7]  + R32_3(q[ 8]) +       q[ 9]  + R32_4(q[10])
+               +       q[11]  + R32_5(q[12]) +       q[13]  + R32_6(q[14])
+               +       q[15]  + R32_7(q[16]) + S32_4(q[17]) + S32_5(q[18]);
+/* expand_2( 4) */
+       q[20] = 
+               ((  ROTL32(((uint32_t*)m)[ 4], 5) 
+                 + ROTL32(((uint32_t*)m)[ 7], 8) 
+                 - ROTL32(((uint32_t*)m)[14], 15) 
+                 + 0x6aaaaaa4UL 
+                )^ ((uint32_t*)h)[11] 
+               )
+               +       q[ 4]  + R32_1(q[ 5]) +       q[ 6]  + R32_2(q[ 7])
+               +       q[ 8]  + R32_3(q[ 9]) +       q[10]  + R32_4(q[11])
+               +       q[12]  + R32_5(q[13]) +       q[14]  + R32_6(q[15])
+               +       q[16]  + R32_7(q[17]) + S32_4(q[18]) + S32_5(q[19]);
+/* expand_2( 5) */
+       q[21] = 
+               ((  ROTL32(((uint32_t*)m)[ 5], 6) 
+                 + ROTL32(((uint32_t*)m)[ 8], 9) 
+                 - ROTL32(((uint32_t*)m)[15], 16) 
+                 + 0x6ffffff9UL 
+                )^ ((uint32_t*)h)[12] 
+               )
+               +       q[ 5]  + R32_1(q[ 6]) +       q[ 7]  + R32_2(q[ 8])
+               +       q[ 9]  + R32_3(q[10]) +       q[11]  + R32_4(q[12])
+               +       q[13]  + R32_5(q[14]) +       q[15]  + R32_6(q[16])
+               +       q[17]  + R32_7(q[18]) + S32_4(q[19]) + S32_5(q[20]);
+/* expand_2( 6) */
+       q[22] = 
+               ((  ROTL32(((uint32_t*)m)[ 6], 7) 
+                 + ROTL32(((uint32_t*)m)[ 9], 10) 
+                 - ROTL32(((uint32_t*)m)[ 0], 1) 
+                 + 0x7555554eUL 
+                )^ ((uint32_t*)h)[13] 
+               )
+               +       q[ 6]  + R32_1(q[ 7]) +       q[ 8]  + R32_2(q[ 9])
+               +       q[10]  + R32_3(q[11]) +       q[12]  + R32_4(q[13])
+               +       q[14]  + R32_5(q[15]) +       q[16]  + R32_6(q[17])
+               +       q[18]  + R32_7(q[19]) + S32_4(q[20]) + S32_5(q[21]);
+/* expand_2( 7) */
+       q[23] = 
+               ((  ROTL32(((uint32_t*)m)[ 7], 8) 
+                 + ROTL32(((uint32_t*)m)[10], 11) 
+                 - ROTL32(((uint32_t*)m)[ 1], 2) 
+                 + 0x7aaaaaa3UL 
+                )^ ((uint32_t*)h)[14] 
+               )
+               +       q[ 7]  + R32_1(q[ 8]) +       q[ 9]  + R32_2(q[10])
+               +       q[11]  + R32_3(q[12]) +       q[13]  + R32_4(q[14])
+               +       q[15]  + R32_5(q[16]) +       q[17]  + R32_6(q[18])
+               +       q[19]  + R32_7(q[20]) + S32_4(q[21]) + S32_5(q[22]);
+/* expand_2( 8) */
+       q[24] = 
+               ((  ROTL32(((uint32_t*)m)[ 8], 9) 
+                 + ROTL32(((uint32_t*)m)[11], 12) 
+                 - ROTL32(((uint32_t*)m)[ 2], 3) 
+                 + 0x7ffffff8UL 
+                )^ ((uint32_t*)h)[15] 
+               )
+               +       q[ 8]  + R32_1(q[ 9]) +       q[10]  + R32_2(q[11])
+               +       q[12]  + R32_3(q[13]) +       q[14]  + R32_4(q[15])
+               +       q[16]  + R32_5(q[17]) +       q[18]  + R32_6(q[19])
+               +       q[20]  + R32_7(q[21]) + S32_4(q[22]) + S32_5(q[23]);
+/* expand_2( 9) */
+       q[25] = 
+               ((  ROTL32(((uint32_t*)m)[ 9], 10) 
+                 + ROTL32(((uint32_t*)m)[12], 13) 
+                 - ROTL32(((uint32_t*)m)[ 3], 4) 
+                 + 0x8555554dUL 
+                )^ ((uint32_t*)h)[ 0] 
+               )
+               +       q[ 9]  + R32_1(q[10]) +       q[11]  + R32_2(q[12])
+               +       q[13]  + R32_3(q[14]) +       q[15]  + R32_4(q[16])
+               +       q[17]  + R32_5(q[18]) +       q[19]  + R32_6(q[20])
+               +       q[21]  + R32_7(q[22]) + S32_4(q[23]) + S32_5(q[24]);
+/* expand_2(10) */
+       q[26] = 
+               ((  ROTL32(((uint32_t*)m)[10], 11) 
+                 + ROTL32(((uint32_t*)m)[13], 14) 
+                 - ROTL32(((uint32_t*)m)[ 4], 5) 
+                 + 0x8aaaaaa2UL 
+                )^ ((uint32_t*)h)[ 1] 
+               )
+               +       q[10]  + R32_1(q[11]) +       q[12]  + R32_2(q[13])
+               +       q[14]  + R32_3(q[15]) +       q[16]  + R32_4(q[17])
+               +       q[18]  + R32_5(q[19]) +       q[20]  + R32_6(q[21])
+               +       q[22]  + R32_7(q[23]) + S32_4(q[24]) + S32_5(q[25]);
+/* expand_2(11) */
+       q[27] = 
+               ((  ROTL32(((uint32_t*)m)[11], 12) 
+                 + ROTL32(((uint32_t*)m)[14], 15) 
+                 - ROTL32(((uint32_t*)m)[ 5], 6) 
+                 + 0x8ffffff7UL 
+                )^ ((uint32_t*)h)[ 2] 
+               )
+               +       q[11]  + R32_1(q[12]) +       q[13]  + R32_2(q[14])
+               +       q[15]  + R32_3(q[16]) +       q[17]  + R32_4(q[18])
+               +       q[19]  + R32_5(q[20]) +       q[21]  + R32_6(q[22])
+               +       q[23]  + R32_7(q[24]) + S32_4(q[25]) + S32_5(q[26]);
+/* expand_2(12) */
+       q[28] = 
+               ((  ROTL32(((uint32_t*)m)[12], 13) 
+                 + ROTL32(((uint32_t*)m)[15], 16) 
+                 - ROTL32(((uint32_t*)m)[ 6], 7) 
+                 + 0x9555554cUL 
+                )^ ((uint32_t*)h)[ 3] 
+               )
+               +       q[12]  + R32_1(q[13]) +       q[14]  + R32_2(q[15])
+               +       q[16]  + R32_3(q[17]) +       q[18]  + R32_4(q[19])
+               +       q[20]  + R32_5(q[21]) +       q[22]  + R32_6(q[23])
+               +       q[24]  + R32_7(q[25]) + S32_4(q[26]) + S32_5(q[27]);
+/* expand_2(13) */
+       q[29] = 
+               ((  ROTL32(((uint32_t*)m)[13], 14) 
+                 + ROTL32(((uint32_t*)m)[ 0], 1) 
+                 - ROTL32(((uint32_t*)m)[ 7], 8) 
+                 + 0x9aaaaaa1UL 
+                )^ ((uint32_t*)h)[ 4] 
+               )
+               +       q[13]  + R32_1(q[14]) +       q[15]  + R32_2(q[16])
+               +       q[17]  + R32_3(q[18]) +       q[19]  + R32_4(q[20])
+               +       q[21]  + R32_5(q[22]) +       q[23]  + R32_6(q[24])
+               +       q[25]  + R32_7(q[26]) + S32_4(q[27]) + S32_5(q[28]);
+/* expand_2(14) */
+       q[30] = 
+               ((  ROTL32(((uint32_t*)m)[14], 15) 
+                 + ROTL32(((uint32_t*)m)[ 1], 2) 
+                 - ROTL32(((uint32_t*)m)[ 8], 9) 
+                 + 0x9ffffff6UL 
+                )^ ((uint32_t*)h)[ 5] 
+               )
+               +       q[14]  + R32_1(q[15]) +       q[16]  + R32_2(q[17])
+               +       q[18]  + R32_3(q[19]) +       q[20]  + R32_4(q[21])
+               +       q[22]  + R32_5(q[23]) +       q[24]  + R32_6(q[25])
+               +       q[26]  + R32_7(q[27]) + S32_4(q[28]) + S32_5(q[29]);
+/* expand_2(15) */
+       q[31] = 
+               ((  ROTL32(((uint32_t*)m)[15], 16) 
+                 + ROTL32(((uint32_t*)m)[ 2], 3) 
+                 - ROTL32(((uint32_t*)m)[ 9], 10) 
+                 + 0xa555554bUL 
+                )^ ((uint32_t*)h)[ 6] 
+               )
+               +       q[15]  + R32_1(q[16]) +       q[17]  + R32_2(q[18])
+               +       q[19]  + R32_3(q[20]) +       q[21]  + R32_4(q[22])
+               +       q[23]  + R32_5(q[24]) +       q[25]  + R32_6(q[26])
+               +       q[27]  + R32_7(q[28]) + S32_4(q[29]) + S32_5(q[30]);
+}
+
+/* END of automatic generated code */
+
diff --git a/bmw/f1_autogen_large.c b/bmw/f1_autogen_large.c
deleted file mode 100644 (file)
index 75eff07..0000000
+++ /dev/null
@@ -1,200 +0,0 @@
-/* BEGIN of automatic generated code */
-
-static inline
-void bmw_large_f1(uint64_t* q, const void* m, const void* h){ 
-/* expand_1( 0) */
-       q[16] = 
-               ((  ROTL64(((uint64_t*)m)[ 0], 1) 
-                 + ROTL64(((uint64_t*)m)[ 3], 4) 
-                 - ROTL64(((uint64_t*)m)[10], 11) 
-                 + 0x5555555555555550ULL 
-                )^ ((uint64_t*)h)[ 7] 
-               )
-               + S64_1(q[ 0]) + S64_2(q[ 1]) + S64_3(q[ 2]) + S64_0(q[ 3])
-               + S64_1(q[ 4]) + S64_2(q[ 5]) + S64_3(q[ 6]) + S64_0(q[ 7])
-               + S64_1(q[ 8]) + S64_2(q[ 9]) + S64_3(q[10]) + S64_0(q[11])
-               + S64_1(q[12]) + S64_2(q[13]) + S64_3(q[14]) + S64_0(q[15]);
-/* expand_1( 1) */
-       q[17] = 
-               ((  ROTL64(((uint64_t*)m)[ 1], 2) 
-                 + ROTL64(((uint64_t*)m)[ 4], 5) 
-                 - ROTL64(((uint64_t*)m)[11], 12) 
-                 + 0x5aaaaaaaaaaaaaa5ULL 
-                )^ ((uint64_t*)h)[ 8] 
-               )
-               + S64_1(q[ 1]) + S64_2(q[ 2]) + S64_3(q[ 3]) + S64_0(q[ 4])
-               + S64_1(q[ 5]) + S64_2(q[ 6]) + S64_3(q[ 7]) + S64_0(q[ 8])
-               + S64_1(q[ 9]) + S64_2(q[10]) + S64_3(q[11]) + S64_0(q[12])
-               + S64_1(q[13]) + S64_2(q[14]) + S64_3(q[15]) + S64_0(q[16]);
-/* expand_2( 2) */
-       q[18] = 
-               ((  ROTL64(((uint64_t*)m)[ 2], 3) 
-                 + ROTL64(((uint64_t*)m)[ 5], 6) 
-                 - ROTL64(((uint64_t*)m)[12], 13) 
-                 + 0x5ffffffffffffffaULL 
-                )^ ((uint64_t*)h)[ 9] 
-               )
-               +       q[ 2]  + R64_1(q[ 3]) +       q[ 4]  + R64_2(q[ 5])
-               +       q[ 6]  + R64_3(q[ 7]) +       q[ 8]  + R64_4(q[ 9])
-               +       q[10]  + R64_5(q[11]) +       q[12]  + R64_6(q[13])
-               +       q[14]  + R64_7(q[15]) + S64_4(q[16]) + S64_5(q[17]);
-/* expand_2( 3) */
-       q[19] = 
-               ((  ROTL64(((uint64_t*)m)[ 3], 4) 
-                 + ROTL64(((uint64_t*)m)[ 6], 7) 
-                 - ROTL64(((uint64_t*)m)[13], 14) 
-                 + 0x655555555555554fULL 
-                )^ ((uint64_t*)h)[10] 
-               )
-               +       q[ 3]  + R64_1(q[ 4]) +       q[ 5]  + R64_2(q[ 6])
-               +       q[ 7]  + R64_3(q[ 8]) +       q[ 9]  + R64_4(q[10])
-               +       q[11]  + R64_5(q[12]) +       q[13]  + R64_6(q[14])
-               +       q[15]  + R64_7(q[16]) + S64_4(q[17]) + S64_5(q[18]);
-/* expand_2( 4) */
-       q[20] = 
-               ((  ROTL64(((uint64_t*)m)[ 4], 5) 
-                 + ROTL64(((uint64_t*)m)[ 7], 8) 
-                 - ROTL64(((uint64_t*)m)[14], 15) 
-                 + 0x6aaaaaaaaaaaaaa4ULL 
-                )^ ((uint64_t*)h)[11] 
-               )
-               +       q[ 4]  + R64_1(q[ 5]) +       q[ 6]  + R64_2(q[ 7])
-               +       q[ 8]  + R64_3(q[ 9]) +       q[10]  + R64_4(q[11])
-               +       q[12]  + R64_5(q[13]) +       q[14]  + R64_6(q[15])
-               +       q[16]  + R64_7(q[17]) + S64_4(q[18]) + S64_5(q[19]);
-/* expand_2( 5) */
-       q[21] = 
-               ((  ROTL64(((uint64_t*)m)[ 5], 6) 
-                 + ROTL64(((uint64_t*)m)[ 8], 9) 
-                 - ROTL64(((uint64_t*)m)[15], 16) 
-                 + 0x6ffffffffffffff9ULL 
-                )^ ((uint64_t*)h)[12] 
-               )
-               +       q[ 5]  + R64_1(q[ 6]) +       q[ 7]  + R64_2(q[ 8])
-               +       q[ 9]  + R64_3(q[10]) +       q[11]  + R64_4(q[12])
-               +       q[13]  + R64_5(q[14]) +       q[15]  + R64_6(q[16])
-               +       q[17]  + R64_7(q[18]) + S64_4(q[19]) + S64_5(q[20]);
-/* expand_2( 6) */
-       q[22] = 
-               ((  ROTL64(((uint64_t*)m)[ 6], 7) 
-                 + ROTL64(((uint64_t*)m)[ 9], 10) 
-                 - ROTL64(((uint64_t*)m)[ 0], 1) 
-                 + 0x755555555555554eULL 
-                )^ ((uint64_t*)h)[13] 
-               )
-               +       q[ 6]  + R64_1(q[ 7]) +       q[ 8]  + R64_2(q[ 9])
-               +       q[10]  + R64_3(q[11]) +       q[12]  + R64_4(q[13])
-               +       q[14]  + R64_5(q[15]) +       q[16]  + R64_6(q[17])
-               +       q[18]  + R64_7(q[19]) + S64_4(q[20]) + S64_5(q[21]);
-/* expand_2( 7) */
-       q[23] = 
-               ((  ROTL64(((uint64_t*)m)[ 7], 8) 
-                 + ROTL64(((uint64_t*)m)[10], 11) 
-                 - ROTL64(((uint64_t*)m)[ 1], 2) 
-                 + 0x7aaaaaaaaaaaaaa3ULL 
-                )^ ((uint64_t*)h)[14] 
-               )
-               +       q[ 7]  + R64_1(q[ 8]) +       q[ 9]  + R64_2(q[10])
-               +       q[11]  + R64_3(q[12]) +       q[13]  + R64_4(q[14])
-               +       q[15]  + R64_5(q[16]) +       q[17]  + R64_6(q[18])
-               +       q[19]  + R64_7(q[20]) + S64_4(q[21]) + S64_5(q[22]);
-/* expand_2( 8) */
-       q[24] = 
-               ((  ROTL64(((uint64_t*)m)[ 8], 9) 
-                 + ROTL64(((uint64_t*)m)[11], 12) 
-                 - ROTL64(((uint64_t*)m)[ 2], 3) 
-                 + 0x7ffffffffffffff8ULL 
-                )^ ((uint64_t*)h)[15] 
-               )
-               +       q[ 8]  + R64_1(q[ 9]) +       q[10]  + R64_2(q[11])
-               +       q[12]  + R64_3(q[13]) +       q[14]  + R64_4(q[15])
-               +       q[16]  + R64_5(q[17]) +       q[18]  + R64_6(q[19])
-               +       q[20]  + R64_7(q[21]) + S64_4(q[22]) + S64_5(q[23]);
-/* expand_2( 9) */
-       q[25] = 
-               ((  ROTL64(((uint64_t*)m)[ 9], 10) 
-                 + ROTL64(((uint64_t*)m)[12], 13) 
-                 - ROTL64(((uint64_t*)m)[ 3], 4) 
-                 + 0x855555555555554dULL 
-                )^ ((uint64_t*)h)[ 0] 
-               )
-               +       q[ 9]  + R64_1(q[10]) +       q[11]  + R64_2(q[12])
-               +       q[13]  + R64_3(q[14]) +       q[15]  + R64_4(q[16])
-               +       q[17]  + R64_5(q[18]) +       q[19]  + R64_6(q[20])
-               +       q[21]  + R64_7(q[22]) + S64_4(q[23]) + S64_5(q[24]);
-/* expand_2(10) */
-       q[26] = 
-               ((  ROTL64(((uint64_t*)m)[10], 11) 
-                 + ROTL64(((uint64_t*)m)[13], 14) 
-                 - ROTL64(((uint64_t*)m)[ 4], 5) 
-                 + 0x8aaaaaaaaaaaaaa2ULL 
-                )^ ((uint64_t*)h)[ 1] 
-               )
-               +       q[10]  + R64_1(q[11]) +       q[12]  + R64_2(q[13])
-               +       q[14]  + R64_3(q[15]) +       q[16]  + R64_4(q[17])
-               +       q[18]  + R64_5(q[19]) +       q[20]  + R64_6(q[21])
-               +       q[22]  + R64_7(q[23]) + S64_4(q[24]) + S64_5(q[25]);
-/* expand_2(11) */
-       q[27] = 
-               ((  ROTL64(((uint64_t*)m)[11], 12) 
-                 + ROTL64(((uint64_t*)m)[14], 15) 
-                 - ROTL64(((uint64_t*)m)[ 5], 6) 
-                 + 0x8ffffffffffffff7ULL 
-                )^ ((uint64_t*)h)[ 2] 
-               )
-               +       q[11]  + R64_1(q[12]) +       q[13]  + R64_2(q[14])
-               +       q[15]  + R64_3(q[16]) +       q[17]  + R64_4(q[18])
-               +       q[19]  + R64_5(q[20]) +       q[21]  + R64_6(q[22])
-               +       q[23]  + R64_7(q[24]) + S64_4(q[25]) + S64_5(q[26]);
-/* expand_2(12) */
-       q[28] = 
-               ((  ROTL64(((uint64_t*)m)[12], 13) 
-                 + ROTL64(((uint64_t*)m)[15], 16) 
-                 - ROTL64(((uint64_t*)m)[ 6], 7) 
-                 + 0x955555555555554cULL 
-                )^ ((uint64_t*)h)[ 3] 
-               )
-               +       q[12]  + R64_1(q[13]) +       q[14]  + R64_2(q[15])
-               +       q[16]  + R64_3(q[17]) +       q[18]  + R64_4(q[19])
-               +       q[20]  + R64_5(q[21]) +       q[22]  + R64_6(q[23])
-               +       q[24]  + R64_7(q[25]) + S64_4(q[26]) + S64_5(q[27]);
-/* expand_2(13) */
-       q[29] = 
-               ((  ROTL64(((uint64_t*)m)[13], 14) 
-                 + ROTL64(((uint64_t*)m)[ 0], 1) 
-                 - ROTL64(((uint64_t*)m)[ 7], 8) 
-                 + 0x9aaaaaaaaaaaaaa1ULL 
-                )^ ((uint64_t*)h)[ 4] 
-               )
-               +       q[13]  + R64_1(q[14]) +       q[15]  + R64_2(q[16])
-               +       q[17]  + R64_3(q[18]) +       q[19]  + R64_4(q[20])
-               +       q[21]  + R64_5(q[22]) +       q[23]  + R64_6(q[24])
-               +       q[25]  + R64_7(q[26]) + S64_4(q[27]) + S64_5(q[28]);
-/* expand_2(14) */
-       q[30] = 
-               ((  ROTL64(((uint64_t*)m)[14], 15) 
-                 + ROTL64(((uint64_t*)m)[ 1], 2) 
-                 - ROTL64(((uint64_t*)m)[ 8], 9) 
-                 + 0x9ffffffffffffff6ULL 
-                )^ ((uint64_t*)h)[ 5] 
-               )
-               +       q[14]  + R64_1(q[15]) +       q[16]  + R64_2(q[17])
-               +       q[18]  + R64_3(q[19]) +       q[20]  + R64_4(q[21])
-               +       q[22]  + R64_5(q[23]) +       q[24]  + R64_6(q[25])
-               +       q[26]  + R64_7(q[27]) + S64_4(q[28]) + S64_5(q[29]);
-/* expand_2(15) */
-       q[31] = 
-               ((  ROTL64(((uint64_t*)m)[15], 16) 
-                 + ROTL64(((uint64_t*)m)[ 2], 3) 
-                 - ROTL64(((uint64_t*)m)[ 9], 10) 
-                 + 0xa55555555555554bULL 
-                )^ ((uint64_t*)h)[ 6] 
-               )
-               +       q[15]  + R64_1(q[16]) +       q[17]  + R64_2(q[18])
-               +       q[19]  + R64_3(q[20]) +       q[21]  + R64_4(q[22])
-               +       q[23]  + R64_5(q[24]) +       q[25]  + R64_6(q[26])
-               +       q[27]  + R64_7(q[28]) + S64_4(q[29]) + S64_5(q[30]);
-}
-
-/* END of automatic generated code */
-
diff --git a/bmw/f1_autogen_large.i b/bmw/f1_autogen_large.i
new file mode 100644 (file)
index 0000000..75eff07
--- /dev/null
@@ -0,0 +1,200 @@
+/* BEGIN of automatic generated code */
+
+static inline
+void bmw_large_f1(uint64_t* q, const void* m, const void* h){ 
+/* expand_1( 0) */
+       q[16] = 
+               ((  ROTL64(((uint64_t*)m)[ 0], 1) 
+                 + ROTL64(((uint64_t*)m)[ 3], 4) 
+                 - ROTL64(((uint64_t*)m)[10], 11) 
+                 + 0x5555555555555550ULL 
+                )^ ((uint64_t*)h)[ 7] 
+               )
+               + S64_1(q[ 0]) + S64_2(q[ 1]) + S64_3(q[ 2]) + S64_0(q[ 3])
+               + S64_1(q[ 4]) + S64_2(q[ 5]) + S64_3(q[ 6]) + S64_0(q[ 7])
+               + S64_1(q[ 8]) + S64_2(q[ 9]) + S64_3(q[10]) + S64_0(q[11])
+               + S64_1(q[12]) + S64_2(q[13]) + S64_3(q[14]) + S64_0(q[15]);
+/* expand_1( 1) */
+       q[17] = 
+               ((  ROTL64(((uint64_t*)m)[ 1], 2) 
+                 + ROTL64(((uint64_t*)m)[ 4], 5) 
+                 - ROTL64(((uint64_t*)m)[11], 12) 
+                 + 0x5aaaaaaaaaaaaaa5ULL 
+                )^ ((uint64_t*)h)[ 8] 
+               )
+               + S64_1(q[ 1]) + S64_2(q[ 2]) + S64_3(q[ 3]) + S64_0(q[ 4])
+               + S64_1(q[ 5]) + S64_2(q[ 6]) + S64_3(q[ 7]) + S64_0(q[ 8])
+               + S64_1(q[ 9]) + S64_2(q[10]) + S64_3(q[11]) + S64_0(q[12])
+               + S64_1(q[13]) + S64_2(q[14]) + S64_3(q[15]) + S64_0(q[16]);
+/* expand_2( 2) */
+       q[18] = 
+               ((  ROTL64(((uint64_t*)m)[ 2], 3) 
+                 + ROTL64(((uint64_t*)m)[ 5], 6) 
+                 - ROTL64(((uint64_t*)m)[12], 13) 
+                 + 0x5ffffffffffffffaULL 
+                )^ ((uint64_t*)h)[ 9] 
+               )
+               +       q[ 2]  + R64_1(q[ 3]) +       q[ 4]  + R64_2(q[ 5])
+               +       q[ 6]  + R64_3(q[ 7]) +       q[ 8]  + R64_4(q[ 9])
+               +       q[10]  + R64_5(q[11]) +       q[12]  + R64_6(q[13])
+               +       q[14]  + R64_7(q[15]) + S64_4(q[16]) + S64_5(q[17]);
+/* expand_2( 3) */
+       q[19] = 
+               ((  ROTL64(((uint64_t*)m)[ 3], 4) 
+                 + ROTL64(((uint64_t*)m)[ 6], 7) 
+                 - ROTL64(((uint64_t*)m)[13], 14) 
+                 + 0x655555555555554fULL 
+                )^ ((uint64_t*)h)[10] 
+               )
+               +       q[ 3]  + R64_1(q[ 4]) +       q[ 5]  + R64_2(q[ 6])
+               +       q[ 7]  + R64_3(q[ 8]) +       q[ 9]  + R64_4(q[10])
+               +       q[11]  + R64_5(q[12]) +       q[13]  + R64_6(q[14])
+               +       q[15]  + R64_7(q[16]) + S64_4(q[17]) + S64_5(q[18]);
+/* expand_2( 4) */
+       q[20] = 
+               ((  ROTL64(((uint64_t*)m)[ 4], 5) 
+                 + ROTL64(((uint64_t*)m)[ 7], 8) 
+                 - ROTL64(((uint64_t*)m)[14], 15) 
+                 + 0x6aaaaaaaaaaaaaa4ULL 
+                )^ ((uint64_t*)h)[11] 
+               )
+               +       q[ 4]  + R64_1(q[ 5]) +       q[ 6]  + R64_2(q[ 7])
+               +       q[ 8]  + R64_3(q[ 9]) +       q[10]  + R64_4(q[11])
+               +       q[12]  + R64_5(q[13]) +       q[14]  + R64_6(q[15])
+               +       q[16]  + R64_7(q[17]) + S64_4(q[18]) + S64_5(q[19]);
+/* expand_2( 5) */
+       q[21] = 
+               ((  ROTL64(((uint64_t*)m)[ 5], 6) 
+                 + ROTL64(((uint64_t*)m)[ 8], 9) 
+                 - ROTL64(((uint64_t*)m)[15], 16) 
+                 + 0x6ffffffffffffff9ULL 
+                )^ ((uint64_t*)h)[12] 
+               )
+               +       q[ 5]  + R64_1(q[ 6]) +       q[ 7]  + R64_2(q[ 8])
+               +       q[ 9]  + R64_3(q[10]) +       q[11]  + R64_4(q[12])
+               +       q[13]  + R64_5(q[14]) +       q[15]  + R64_6(q[16])
+               +       q[17]  + R64_7(q[18]) + S64_4(q[19]) + S64_5(q[20]);
+/* expand_2( 6) */
+       q[22] = 
+               ((  ROTL64(((uint64_t*)m)[ 6], 7) 
+                 + ROTL64(((uint64_t*)m)[ 9], 10) 
+                 - ROTL64(((uint64_t*)m)[ 0], 1) 
+                 + 0x755555555555554eULL 
+                )^ ((uint64_t*)h)[13] 
+               )
+               +       q[ 6]  + R64_1(q[ 7]) +       q[ 8]  + R64_2(q[ 9])
+               +       q[10]  + R64_3(q[11]) +       q[12]  + R64_4(q[13])
+               +       q[14]  + R64_5(q[15]) +       q[16]  + R64_6(q[17])
+               +       q[18]  + R64_7(q[19]) + S64_4(q[20]) + S64_5(q[21]);
+/* expand_2( 7) */
+       q[23] = 
+               ((  ROTL64(((uint64_t*)m)[ 7], 8) 
+                 + ROTL64(((uint64_t*)m)[10], 11) 
+                 - ROTL64(((uint64_t*)m)[ 1], 2) 
+                 + 0x7aaaaaaaaaaaaaa3ULL 
+                )^ ((uint64_t*)h)[14] 
+               )
+               +       q[ 7]  + R64_1(q[ 8]) +       q[ 9]  + R64_2(q[10])
+               +       q[11]  + R64_3(q[12]) +       q[13]  + R64_4(q[14])
+               +       q[15]  + R64_5(q[16]) +       q[17]  + R64_6(q[18])
+               +       q[19]  + R64_7(q[20]) + S64_4(q[21]) + S64_5(q[22]);
+/* expand_2( 8) */
+       q[24] = 
+               ((  ROTL64(((uint64_t*)m)[ 8], 9) 
+                 + ROTL64(((uint64_t*)m)[11], 12) 
+                 - ROTL64(((uint64_t*)m)[ 2], 3) 
+                 + 0x7ffffffffffffff8ULL 
+                )^ ((uint64_t*)h)[15] 
+               )
+               +       q[ 8]  + R64_1(q[ 9]) +       q[10]  + R64_2(q[11])
+               +       q[12]  + R64_3(q[13]) +       q[14]  + R64_4(q[15])
+               +       q[16]  + R64_5(q[17]) +       q[18]  + R64_6(q[19])
+               +       q[20]  + R64_7(q[21]) + S64_4(q[22]) + S64_5(q[23]);
+/* expand_2( 9) */
+       q[25] = 
+               ((  ROTL64(((uint64_t*)m)[ 9], 10) 
+                 + ROTL64(((uint64_t*)m)[12], 13) 
+                 - ROTL64(((uint64_t*)m)[ 3], 4) 
+                 + 0x855555555555554dULL 
+                )^ ((uint64_t*)h)[ 0] 
+               )
+               +       q[ 9]  + R64_1(q[10]) +       q[11]  + R64_2(q[12])
+               +       q[13]  + R64_3(q[14]) +       q[15]  + R64_4(q[16])
+               +       q[17]  + R64_5(q[18]) +       q[19]  + R64_6(q[20])
+               +       q[21]  + R64_7(q[22]) + S64_4(q[23]) + S64_5(q[24]);
+/* expand_2(10) */
+       q[26] = 
+               ((  ROTL64(((uint64_t*)m)[10], 11) 
+                 + ROTL64(((uint64_t*)m)[13], 14) 
+                 - ROTL64(((uint64_t*)m)[ 4], 5) 
+                 + 0x8aaaaaaaaaaaaaa2ULL 
+                )^ ((uint64_t*)h)[ 1] 
+               )
+               +       q[10]  + R64_1(q[11]) +       q[12]  + R64_2(q[13])
+               +       q[14]  + R64_3(q[15]) +       q[16]  + R64_4(q[17])
+               +       q[18]  + R64_5(q[19]) +       q[20]  + R64_6(q[21])
+               +       q[22]  + R64_7(q[23]) + S64_4(q[24]) + S64_5(q[25]);
+/* expand_2(11) */
+       q[27] = 
+               ((  ROTL64(((uint64_t*)m)[11], 12) 
+                 + ROTL64(((uint64_t*)m)[14], 15) 
+                 - ROTL64(((uint64_t*)m)[ 5], 6) 
+                 + 0x8ffffffffffffff7ULL 
+                )^ ((uint64_t*)h)[ 2] 
+               )
+               +       q[11]  + R64_1(q[12]) +       q[13]  + R64_2(q[14])
+               +       q[15]  + R64_3(q[16]) +       q[17]  + R64_4(q[18])
+               +       q[19]  + R64_5(q[20]) +       q[21]  + R64_6(q[22])
+               +       q[23]  + R64_7(q[24]) + S64_4(q[25]) + S64_5(q[26]);
+/* expand_2(12) */
+       q[28] = 
+               ((  ROTL64(((uint64_t*)m)[12], 13) 
+                 + ROTL64(((uint64_t*)m)[15], 16) 
+                 - ROTL64(((uint64_t*)m)[ 6], 7) 
+                 + 0x955555555555554cULL 
+                )^ ((uint64_t*)h)[ 3] 
+               )
+               +       q[12]  + R64_1(q[13]) +       q[14]  + R64_2(q[15])
+               +       q[16]  + R64_3(q[17]) +       q[18]  + R64_4(q[19])
+               +       q[20]  + R64_5(q[21]) +       q[22]  + R64_6(q[23])
+               +       q[24]  + R64_7(q[25]) + S64_4(q[26]) + S64_5(q[27]);
+/* expand_2(13) */
+       q[29] = 
+               ((  ROTL64(((uint64_t*)m)[13], 14) 
+                 + ROTL64(((uint64_t*)m)[ 0], 1) 
+                 - ROTL64(((uint64_t*)m)[ 7], 8) 
+                 + 0x9aaaaaaaaaaaaaa1ULL 
+                )^ ((uint64_t*)h)[ 4] 
+               )
+               +       q[13]  + R64_1(q[14]) +       q[15]  + R64_2(q[16])
+               +       q[17]  + R64_3(q[18]) +       q[19]  + R64_4(q[20])
+               +       q[21]  + R64_5(q[22]) +       q[23]  + R64_6(q[24])
+               +       q[25]  + R64_7(q[26]) + S64_4(q[27]) + S64_5(q[28]);
+/* expand_2(14) */
+       q[30] = 
+               ((  ROTL64(((uint64_t*)m)[14], 15) 
+                 + ROTL64(((uint64_t*)m)[ 1], 2) 
+                 - ROTL64(((uint64_t*)m)[ 8], 9) 
+                 + 0x9ffffffffffffff6ULL 
+                )^ ((uint64_t*)h)[ 5] 
+               )
+               +       q[14]  + R64_1(q[15]) +       q[16]  + R64_2(q[17])
+               +       q[18]  + R64_3(q[19]) +       q[20]  + R64_4(q[21])
+               +       q[22]  + R64_5(q[23]) +       q[24]  + R64_6(q[25])
+               +       q[26]  + R64_7(q[27]) + S64_4(q[28]) + S64_5(q[29]);
+/* expand_2(15) */
+       q[31] = 
+               ((  ROTL64(((uint64_t*)m)[15], 16) 
+                 + ROTL64(((uint64_t*)m)[ 2], 3) 
+                 - ROTL64(((uint64_t*)m)[ 9], 10) 
+                 + 0xa55555555555554bULL 
+                )^ ((uint64_t*)h)[ 6] 
+               )
+               +       q[15]  + R64_1(q[16]) +       q[17]  + R64_2(q[18])
+               +       q[19]  + R64_3(q[20]) +       q[21]  + R64_4(q[22])
+               +       q[23]  + R64_5(q[24]) +       q[25]  + R64_6(q[26])
+               +       q[27]  + R64_7(q[28]) + S64_4(q[29]) + S64_5(q[30]);
+}
+
+/* END of automatic generated code */
+
index 91c0effd85253d11e96d5474385800f914725f72..0c03b771dff2d7bf9943db9ea2e23fcee0ffe1cd 100644 (file)
@@ -29,7 +29,6 @@
 #include "ubi.h"
 #include "skein.h"
 
-#include "cli.h"
 
 void skein256_init(skein256_ctx_t* ctx, uint16_t outsize_b){
        skein_config_t conf;