+ t = tmp[4*i+3] ^ tmp[4*i+2];
+ u = tmp[4*i+1] ^ tmp[4*i+0];
+ v = t ^ u;
+ v = gf256mul(0x09, v, 0x1b);
+ w = v ^ gf256mul(0x04, tmp[4*i+2] ^ tmp[4*i+0], 0x1b);
+ v = v ^ gf256mul(0x04, tmp[4*i+3] ^ tmp[4*i+1], 0x1b);
+ state->s[4*i+3] = tmp[4*i+3] ^ v ^ gf256mul(0x02, tmp[4*i+0] ^ tmp[4*i+3], 0x1b);
+ state->s[4*i+2] = tmp[4*i+2] ^ w ^ gf256mul(0x02, t, 0x1b);
+ state->s[4*i+1] = tmp[4*i+1] ^ v ^ gf256mul(0x02, tmp[4*i+2] ^ tmp[4*i+1], 0x1b);
+ state->s[4*i+0] = tmp[4*i+0] ^ w ^ gf256mul(0x02, u, 0x1b);
+
+ /*