- blocks[i].d[0] = BSWAP4(ctx->A[i]); ctx->A[i] = key->tail.h[0];
- blocks[i].d[1] = BSWAP4(ctx->B[i]); ctx->B[i] = key->tail.h[1];
- blocks[i].d[2] = BSWAP4(ctx->C[i]); ctx->C[i] = key->tail.h[2];
- blocks[i].d[3] = BSWAP4(ctx->D[i]); ctx->D[i] = key->tail.h[3];
- blocks[i].d[4] = BSWAP4(ctx->E[i]); ctx->E[i] = key->tail.h[4];
- blocks[i].d[5] = BSWAP4(ctx->F[i]); ctx->F[i] = key->tail.h[5];
- blocks[i].d[6] = BSWAP4(ctx->G[i]); ctx->G[i] = key->tail.h[6];
- blocks[i].d[7] = BSWAP4(ctx->H[i]); ctx->H[i] = key->tail.h[7];
+ PUTU32(blocks[i].c+0,ctx->A[i]); ctx->A[i] = key->tail.h[0];
+ PUTU32(blocks[i].c+4,ctx->B[i]); ctx->B[i] = key->tail.h[1];
+ PUTU32(blocks[i].c+8,ctx->C[i]); ctx->C[i] = key->tail.h[2];
+ PUTU32(blocks[i].c+12,ctx->D[i]); ctx->D[i] = key->tail.h[3];
+ PUTU32(blocks[i].c+16,ctx->E[i]); ctx->E[i] = key->tail.h[4];
+ PUTU32(blocks[i].c+20,ctx->F[i]); ctx->F[i] = key->tail.h[5];
+ PUTU32(blocks[i].c+24,ctx->G[i]); ctx->G[i] = key->tail.h[6];
+ PUTU32(blocks[i].c+28,ctx->H[i]); ctx->H[i] = key->tail.h[7];