voidMY_MMult( int m, int n, int k, double *a, int lda, double *b, int ldb, double *c, int ldc ) { int i, j;
for ( j=0; j<n; j+=4 ){ /* Loop over the columns of C, unrolled by 4 */ for ( i=0; i<m; i+=4 ){ /* Loop over the rows of C */ /* Update C( i,j ), C( i,j+1 ), C( i,j+2 ), and C( i,j+3 ) in one routine (four inner products) */
voidMY_MMult( int m, int n, int k, double *a, int lda, double *b, int ldb, double *c, int ldc ) { int i, j;
for ( j=0; j<n; j+=4 ){ /* Loop over the columns of C, unrolled by 4 */ for ( i=0; i<m; i+=4 ){ /* Loop over the rows of C */ /* Update C( i,j ), C( i,j+1 ), C( i,j+2 ), and C( i,j+3 ) in one routine (four inner products) */
voidMY_MMult( int m, int n, int k, double *a, int lda, double *b, int ldb, double *c, int ldc ) { int i, j;
for ( j=0; j<n; j+=4 ){ /* Loop over the columns of C, unrolled by 4 */ for ( i=0; i<m; i+=4 ){ /* Loop over the rows of C */ /* Update C( i,j ), C( i,j+1 ), C( i,j+2 ), and C( i,j+3 ) in one routine (four inner products) */