- for(i=16; i<4*4+16; i+=4){
- t = ctx->a[i];
- ctx->a[i-16] ^= t;
- ctx->a[i] = ctx->a[i+2] + ctx->a[i-16];
- ctx->a[i-16] = rotate11left(ctx->a[i-16]);
- ctx->a[i-14] ^= ctx->a[i+2];
- ctx->a[i+2] = t + ctx->a[i-14];
- ctx->a[i-14] = rotate11left(ctx->a[i-14]);
- t = ctx->a[i+1];
- ctx->a[i-15] ^= t;
- ctx->a[i+1] = ctx->a[i+3] + ctx->a[i-15];
- ctx->a[i-15] = rotate11left(ctx->a[i-15]);
- ctx->a[i-13] ^= ctx->a[i+3];
- ctx->a[i+3] = t + ctx->a[i-13];
- ctx->a[i-13] = rotate11left(ctx->a[i-13]);
- }
- for(i=0; i<4; ++i){
- t = ctx->a[i];
- ctx->a[i] = ctx->a[i+4];
- ctx->a[i+4] = t;
- }
- for(i=8; i<4+8; ++i){
- t = ctx->a[i];
- ctx->a[i] = ctx->a[i+4];
- ctx->a[i+4] = t;
- }
- for(i=16; i<16+16; i+=2){
- ctx->a[i-16] ^= t = ctx->a[i];
- ctx->a[i-15] ^= ctx->a[i] = ctx->a[i+1];
- ctx->a[i+1] = t;
+ xchg32_array(&(ctx->a[0]), &(ctx->a[4]), 4);
+ xchg32_array(&(ctx->a[8]), &(ctx->a[12]), 4);
+ for(i=0; i<16; i+=2){
+ ctx->a[i] ^= t = ctx->a[i+16];
+ ctx->a[i+1] ^= ctx->a[i+16] = ctx->a[i+17];
+ ctx->a[i+17] = t;