/* Example program that shows how to create a multithreaded program in which each thread will treat a part of an array. */ #include #include #include #include #include // cout, ... using namespace std; using namespace std::chrono; const int MAX_NBR_ITERATIONS = 100; #define N (1024*1024) _int64 elementWiseMultiplication(int n, float* arr, float* arr2, float* result); _int64 elementWiseMultiplicationIterative(int n, int nbrIterations, float* arr, float* arr2, float* result); int main() { cout << "Considering arrays of size " << N << endl; // allocation and initialization srand(time(NULL)); // random initial seed float* arr = new float[N], *arr2 = new float[N], *result_seq = new float[N], *result_par = new float[N], *arr4 = new float[N], *arr5 = new float[N], *arr6 = new float[N]; for (int i = 0; i < N; i++) { arr[i] = rand(); arr2[i] = rand(); result_seq[i] = 0; // actually, initialization is not necessary result_par[i] = 0; arr4[i] = rand(); arr5[i] = rand(); arr6[i] = 0; } // flush caches _int64 run_time_seq = elementWiseMultiplication(N, arr4, arr5, arr6); cout << "Runtime for basic version: " << run_time_seq << "us " << (run_time_seq * 1000 / N) << "ns/op" << endl; // warm up for (int nbr_iterations = 10; nbr_iterations <= MAX_NBR_ITERATIONS; nbr_iterations+=10) { _int64 run_time = elementWiseMultiplicationIterative(N, nbr_iterations, arr, arr2, result_par); cout << "Runtime for " << nbr_iterations << " iterations: " << run_time << "us " << (run_time * 1000 / (float) N / nbr_iterations) << "ns/op - Slowdown = "<< ((double)run_time/run_time_seq) < now = system_clock::now(); for (int i = 0; i < n; ++i) { result[i] = arr[i] * arr2[i]; } time_point epoch = system_clock::now(); microseconds us = duration_cast(epoch - now); return us.count(); } _int64 elementWiseMultiplicationIterative(int n, int nbrIterations, float* arr, float* arr2, float* result) { time_point now = system_clock::now(); for (int i = 0; i < n; ++i) { float x = arr[i]; for (int it = 0;it < nbrIterations;it++) { x = x * arr2[i]; } result[i] = x; } time_point epoch = system_clock::now(); microseconds us = duration_cast(epoch - now); return us.count(); } void checkIfResultsAreTheSame(string name, float* arr3, float* arr4, int n, bool PRINT_DATA) { int nbr_diff = 0; for (int i = 0; i < n; i++) { float rel_diff = arr3[i] == arr4[i] ? 0 : (arr3[i] - arr4[i]) / (arr3[i] == 0 ? arr4[i] : arr3[i]); if (rel_diff > 0.0001) { nbr_diff++; if (PRINT_DATA && i < 10) cout << arr3[i] << "<>" << arr4[i] << " (" << rel_diff << "), "; } else { if (PRINT_DATA && i < 10) cout << arr3[i] << "==" << arr4[i] << " (" << rel_diff << "), "; } arr4[i] = 0; // we reset arr4 for further usage } if (PRINT_DATA) cout << endl; if (nbr_diff > 0) cout << "Problem with algorithm : " << nbr_diff << " differences with Original Algorithm." << endl; } // " << name << "