- for(i=0; i<16; ++i){
- ctx->a[i] ^= ctx->a[i+16];
- }
- for(i=16; i<4*4+16; i+=4){
- t = ctx->a[i];
- ctx->a[i] = ctx->a[i+2];
- ctx->a[i+2] = t;
- t = ctx->a[i+1];
- ctx->a[i+1] = ctx->a[i+3];
- ctx->a[i+3] = t;
- }
- for(i=0; i<16; ++i){
- ctx->a[i+16] += ctx->a[i];
- ctx->a[i] = rotate11left(ctx->a[i]);
- }
- for(i=0; i<4; ++i){
- t = ctx->a[i];
- ctx->a[i] = ctx->a[i+4];
- ctx->a[i+4] = t;
- }
- for(i=8; i<4+8; ++i){
- t = ctx->a[i];
- ctx->a[i] = ctx->a[i+4];
- ctx->a[i+4] = t;
- }
- for(i=0; i<16; ++i){
- ctx->a[i] ^= ctx->a[i+16];
- }
- for(i=16; i<16+16; i+=2){
- t = ctx->a[i];
- ctx->a[i] = ctx->a[i+1];
- ctx->a[i+1] = t;
+ xchg32_array(&(ctx->a[0]), &(ctx->a[4]), 4);
+ xchg32_array(&(ctx->a[8]), &(ctx->a[12]), 4);
+ for(i=0; i<16; i+=2){
+ ctx->a[i] ^= t = ctx->a[i+16];
+ ctx->a[i+1] ^= ctx->a[i+16] = ctx->a[i+17];
+ ctx->a[i+17] = t;