- state->s[4*0+i] =
- gf256mul(0xe, tmp[4*0+i], 0x1b)
- ^ gf256mul(0xb, tmp[4*1+i], 0x1b)
- ^ gf256mul(0xd, tmp[4*2+i], 0x1b)
- ^ gf256mul(0x9, tmp[4*3+i], 0x1b);
- state->s[4*1+i] =
- gf256mul(0x9, tmp[4*0+i], 0x1b)
- ^ gf256mul(0xe, tmp[4*1+i], 0x1b)
- ^ gf256mul(0xb, tmp[4*2+i], 0x1b)
- ^ gf256mul(0xd, tmp[4*3+i], 0x1b);
- state->s[4*2+i] =
- gf256mul(0xd, tmp[4*0+i], 0x1b)
- ^ gf256mul(0x9, tmp[4*1+i], 0x1b)
- ^ gf256mul(0xe, tmp[4*2+i], 0x1b)
- ^ gf256mul(0xb, tmp[4*3+i], 0x1b);
- state->s[4*3+i] =
- gf256mul(0xb, tmp[4*0+i], 0x1b)
- ^ gf256mul(0xd, tmp[4*1+i], 0x1b)
- ^ gf256mul(0x9, tmp[4*2+i], 0x1b)
- ^ gf256mul(0xe, tmp[4*3+i], 0x1b);
+ t = tmp[4*i+3] ^ tmp[4*i+2];
+ u = tmp[4*i+1] ^ tmp[4*i+0];
+ v = t ^ u;
+ v = gf256mul(0x09, v, 0x1b);
+ w = v ^ gf256mul(0x04, tmp[4*i+2] ^ tmp[4*i+0], 0x1b);
+ v = v ^ gf256mul(0x04, tmp[4*i+3] ^ tmp[4*i+1], 0x1b);
+ state->s[4*i+3] = tmp[4*i+3] ^ v ^ gf256mul(0x02, tmp[4*i+0] ^ tmp[4*i+3], 0x1b);
+ state->s[4*i+2] = tmp[4*i+2] ^ w ^ gf256mul(0x02, t, 0x1b);
+ state->s[4*i+1] = tmp[4*i+1] ^ v ^ gf256mul(0x02, tmp[4*i+2] ^ tmp[4*i+1], 0x1b);
+ state->s[4*i+0] = tmp[4*i+0] ^ w ^ gf256mul(0x02, u, 0x1b);
+
+ /*
+ state->s[4*i+0] =
+ gf256mul(0xe, tmp[4*i+0], 0x1b)
+ ^ gf256mul(0xb, tmp[4*i+1], 0x1b)
+ ^ gf256mul(0xd, tmp[4*i+2], 0x1b)
+ ^ gf256mul(0x9, tmp[4*i+3], 0x1b);
+ state->s[4*i+1] =
+ gf256mul(0x9, tmp[4*i+0], 0x1b)
+ ^ gf256mul(0xe, tmp[4*i+1], 0x1b)
+ ^ gf256mul(0xb, tmp[4*i+2], 0x1b)
+ ^ gf256mul(0xd, tmp[4*i+3], 0x1b);
+ state->s[4*i+2] =
+ gf256mul(0xd, tmp[4*i+0], 0x1b)
+ ^ gf256mul(0x9, tmp[4*i+1], 0x1b)
+ ^ gf256mul(0xe, tmp[4*i+2], 0x1b)
+ ^ gf256mul(0xb, tmp[4*i+3], 0x1b);
+ state->s[4*i+3] =
+ gf256mul(0xb, tmp[4*i+0], 0x1b)
+ ^ gf256mul(0xd, tmp[4*i+1], 0x1b)
+ ^ gf256mul(0x9, tmp[4*i+2], 0x1b)
+ ^ gf256mul(0xe, tmp[4*i+3], 0x1b);
+ */