/* * Code to test an algorithm and measure its performance. * Author: Jan Lemeire, 2020 * http://parallel.vub.ac.be/education/ppp/practica/PPP_practica.html */ #include #include #include //#include #include #include //#include #include #include #include #include // for vector instructions //#include // for multithreading //#include //openmp using namespace std; using namespace std::chrono; #define N 1024 * 1024 //* 16 #define ITERATIONS 16 #define TYPE int bool PRINT_DATA = true; // set this to true to print input & output data. Use this to debug _int64 pseudoRandomGenerator(int n, int* arr, int* arr2, int* result); _int64 pseudoRandomGenerator(int n, float* arr, float* arr2, float* result); //bool checkIfResultsAreTheSame(string name, float* arr3, float* arr4, int n); int main(int argc, char *argv[]) { const _int64 nbrOfOperationPerIteration = 3; const _int64 totalNbrOfOperations = N * ITERATIONS * nbrOfOperationPerIteration; _int64 totalNbrOfBytes = N * 8; // cout << "** Comparison of element-wise product of 2 vectors ** " << endl; // cout << "Vector size N = " << N << " Nbr Experiments = " << NBR_EXPERIMENTS << " PRINT_DATA = "<< PRINT_DATA<< " => " << totalNbrOfOperations << " operations and "< " << totalNbrOfOperations << " operations and " << totalNbrOfBytes << " bytes transferred" << endl; if (N % 16 != 0) { cout << "N (" << N << ") should be a multiple of 16 (for the multithreaded versions)!" << endl; return -1; } srand(1); // constant seed. for random initial seed: use time(NULL) // allocation and initialization TYPE* arr = new TYPE[N], *arr2 = new TYPE[N], *arr3 = new TYPE[N], *arr4 = new TYPE[N]; // input and output arrays for (int i = 0; i < N; i++) { arr[i] = rand(); // rand() is between 0 and 32000 arr2[i] = rand(); if (PRINT_DATA && i < 10) cout << arr[i] << "*" << arr2[i] << ", "; arr3[i] = 0; arr4[i] = 0; } if (PRINT_DATA) cout << endl; pseudoRandomGenerator(N, arr, arr2, arr3); // warm up _int64 seq_run_time = pseudoRandomGenerator(N, arr, arr2, arr3); // returns time in microseconds cout << "Reference time = " << seq_run_time << "us => time per operation = " << setprecision(3) << (1000.0 * seq_run_time) / totalNbrOfOperations << "ns => " << totalNbrOfOperations / 1000.0 / seq_run_time << "GOps" << endl; // Deallocate memory delete[] arr; delete[] arr2; delete[] arr3; delete[] arr4; cout << endl<<"Press ENTER to close window..."; char c = cin.get(); } // ==== ALL VERSIONS ==== _int64 pseudoRandomGenerator(int n, int* arr, int* arr2, int* result) { time_point now = system_clock::now(); const int constantA = 29, constantB = 927, constantC = 1013, constantD = 10323; for (int i = 0; i < N; ++i) { int x = arr[i]; // seed for (int it = 0; it < ITERATIONS; ++it) x = ((x + constantA) * constantB - constantD); //x = ((x + constantA) * constantB - constantC) % constantD; // modulo is very slow! result[i] = x; } microseconds us = duration_cast(system_clock::now() - now); if (PRINT_DATA) { cout << "Output: " << endl; for (int i = 0; i < 10; ++i) cout << arr[i] << " => " << result[i] << endl; } return us.count(); } _int64 pseudoRandomGenerator(int n, float* arr, float* arr2, float* result) { time_point now = system_clock::now(); const float constantA = 29.32f, constantB = 0.9271f, constantC = 1013.3f, constantD = 103.23f; for (int i = 0; i < N; ++i) { float x = arr[i]; // seed for (int it = 0; it < ITERATIONS; ++it) x = ((x + constantA) * constantB - constantD); result[i] = x; } microseconds us = duration_cast(system_clock::now() - now); if (PRINT_DATA) { cout << "Output: " << endl; for (int i = 0; i < 10; ++i) cout << setprecision(3) << arr[i] << " => " << result[i] << endl; } return us.count(); } // *** utility functions *** // we compare the output of the 2 versions and expect no significant differences bool checkIfResultsAreTheSame(string name, float* arr3, float* arr4, int n) { int nbr_diff = 0; for (int i = 0; i < n; i++) { float rel_diff = arr3[i] == arr4[i] ? 0 : (arr3[i] - arr4[i]) / (arr3[i] == 0 ? arr4[i] : arr3[i]); if (rel_diff > 0.01) { nbr_diff++; if (PRINT_DATA && nbr_diff < 10) cout << arr3[i] << "<>" << arr4[i] << " (" << rel_diff << "), "; } else { if (PRINT_DATA && i < 10) cout << arr3[i] << "==" << arr4[i] << " (" << rel_diff << "), "; } arr4[i] = 0; // we reset arr4 for further usage } if (PRINT_DATA) cout << endl; if (nbr_diff > 0) cout << "Problem with algorithm " << name << ": " << nbr_diff << " differences with Original Algorithm." << endl; return nbr_diff == 0; }