BLISlab中C矩阵初始化速度测试
假设我们想把C中的所有元素都设为0。
统一编译命令:
| g++ xxx.cpp -O0 -std=c++11 -o xxx
|
版本一:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
| #include<bits/stdc++.h> using namespace std; #define C( i, j ) C[ (j)*ldc + (i) ] int main() { int i,j,m,n; m=20000; n=20000; int ldc=m; double* C =(double*)_mm_malloc(m*n*sizeof(double),32); std::chrono::high_resolution_clock::time_point tp1 = std::chrono::high_resolution_clock::now(); for( i = 0; i < m; i++){ for( j = 0; j < n; j++){ C(i,j) = 0.0; } } std::chrono::high_resolution_clock::time_point tp2 = std::chrono::high_resolution_clock::now(); std::chrono::duration<size_t, std::nano> dur = tp2 - tp1; std::cout << "1 被计时代码耗时:" << std::chrono::duration_cast<std::chrono::nanoseconds>(dur).count() << " 纳秒" << std::endl; std::cout << "2 被计时代码耗时:" << std::chrono::duration_cast<std::chrono::microseconds>(dur).count() << " 微妙" << std::endl; std::cout << "3 被计时代码耗时:" << std::chrono::duration_cast<std::chrono::milliseconds>(dur).count() << " 毫秒" << std::endl; std::cout << "4 被计时代码耗时:" << std::chrono::duration_cast<std::chrono::seconds>(dur).count() << " 秒钟" << std::endl; return 0; }
|
实测结果:
| 1 被计时代码耗时:10776901341 纳秒 2 被计时代码耗时:10776901 微妙 3 被计时代码耗时:10776 毫秒 4 被计时代码耗时:10 秒钟
|
版本二:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
| #include<bits/stdc++.h> using namespace std; #define C( i, j ) C[ (j)*ldc + (i) ] int main() { int i,j,m,n; m=20000; n=20000; int ldc=m; double* C =(double*)_mm_malloc(m*n*sizeof(double),32); double *cp; std::chrono::high_resolution_clock::time_point tp1 = std::chrono::high_resolution_clock::now(); for( j = 0; j < n; j++){ cp = &C[ j*ldc ]; for( i = 0; i < m; i++){ *cp++ = 0.0; } } std::chrono::high_resolution_clock::time_point tp2 = std::chrono::high_resolution_clock::now(); std::chrono::duration<size_t, std::nano> dur = tp2 - tp1; std::cout << "1 被计时代码耗时:" << std::chrono::duration_cast<std::chrono::nanoseconds>(dur).count() << " 纳秒" << std::endl; std::cout << "2 被计时代码耗时:" << std::chrono::duration_cast<std::chrono::microseconds>(dur).count() << " 微妙" << std::endl; std::cout << "3 被计时代码耗时:" << std::chrono::duration_cast<std::chrono::milliseconds>(dur).count() << " 毫秒" << std::endl; std::cout << "4 被计时代码耗时:" << std::chrono::duration_cast<std::chrono::seconds>(dur).count() << " 秒钟" << std::endl; return 0; }
|
实测结果:
| 1 被计时代码耗时:3455968000 纳秒 2 被计时代码耗时:3455968 微妙 3 被计时代码耗时:3455 毫秒 4 被计时代码耗时:3 秒钟
|
版本三:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
| #include<bits/stdc++.h> using namespace std; #define C( i, j ) C[ (j)*ldc + (i) ] int main() { int i,j,m,n; m=20000; n=20000; int ldc=m; double* C =(double*)_mm_malloc(m*n*sizeof(double),32); double *cp; std::chrono::high_resolution_clock::time_point tp1 = std::chrono::high_resolution_clock::now(); for( j = 0; j < n; j++){ cp = &C[ j * ldc ]; for( i = 0; i < m; i+=4){ *(cp+0) = 0.0; *(cp+1) = 0.0; *(cp+2) = 0.0; *(cp+3) = 0.0; cp+=4; } } std::chrono::high_resolution_clock::time_point tp2 = std::chrono::high_resolution_clock::now(); std::chrono::duration<size_t, std::nano> dur = tp2 - tp1; std::cout << "1 被计时代码耗时:" << std::chrono::duration_cast<std::chrono::nanoseconds>(dur).count() << " 纳秒" << std::endl; std::cout << "2 被计时代码耗时:" << std::chrono::duration_cast<std::chrono::microseconds>(dur).count() << " 微妙" << std::endl; std::cout << "3 被计时代码耗时:" << std::chrono::duration_cast<std::chrono::milliseconds>(dur).count() << " 毫秒" << std::endl; std::cout << "4 被计时代码耗时:" << std::chrono::duration_cast<std::chrono::seconds>(dur).count() << " 秒钟" << std::endl; return 0; }
|
实测结果:
| 1 被计时代码耗时:2581347801 纳秒 2 被计时代码耗时:2581347 微妙 3 被计时代码耗时:2581 毫秒 4 被计时代码耗时:2 秒钟
|
版本四:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
| #include<bits/stdc++.h> using namespace std; #define C( i, j ) C[ (j)*ldc + (i) ] int main() { int i,j,m,n; m=20000; n=20000; int ldc=m; double* C =(double*)_mm_malloc(m*n*sizeof(double),32); double *cp; std::chrono::high_resolution_clock::time_point tp1 = std::chrono::high_resolution_clock::now(); register double c0 =0.0,c1=0.0,c2=0.0,c3=0.0; for( j = 0; j < n; j++){ cp = &C[ j * ldc ]; for( i = 0; i < m; i+=4){ *(cp+0) = c0; *(cp+1) = c1; *(cp+2) = c2; *(cp+3) = c3; cp+=4; } } std::chrono::high_resolution_clock::time_point tp2 = std::chrono::high_resolution_clock::now(); std::chrono::duration<size_t, std::nano> dur = tp2 - tp1; std::cout << "1 被计时代码耗时:" << std::chrono::duration_cast<std::chrono::nanoseconds>(dur).count() << " 纳秒" << std::endl; std::cout << "2 被计时代码耗时:" << std::chrono::duration_cast<std::chrono::microseconds>(dur).count() << " 微妙" << std::endl; std::cout << "3 被计时代码耗时:" << std::chrono::duration_cast<std::chrono::milliseconds>(dur).count() << " 毫秒" << std::endl; std::cout << "4 被计时代码耗时:" << std::chrono::duration_cast<std::chrono::seconds>(dur).count() << " 秒钟" << std::endl; return 0; }
|
实测结果:
| 1 被计时代码耗时:2269606428 纳秒 2 被计时代码耗时:2269606 微妙 3 被计时代码耗时:2269 毫秒 4 被计时代码耗时:2 秒钟
|
参考资料:
C++11 chrono 高精度计时方法 | GuKaifeng’s Blog