#include // cout, ... #include // for setw and setfill (used with cout to align output), setprecision #include #include #include using namespace std; using namespace std::chrono; typedef float TYPE; int main() { const int KERNEL_RADIUS = 5; const int NBR_DATA = 18; // 32 * 8;// *1024; const int CYCLES = 25; cout << "SimulationByConvolution with parameters: KERNEL_RADIUS = " << KERNEL_RADIUS << ", NBR_DATA = " << NBR_DATA << ", CYCLES = " << CYCLES << ", element type = "<< typeid(TYPE).name() << endl; // ** allocation and initialization of the data** TYPE* input = new TYPE[NBR_DATA + 2 * KERNEL_RADIUS], *result_seq = new TYPE[NBR_DATA], *result_par = new TYPE[NBR_DATA]; TYPE* cycle_result = new TYPE[NBR_DATA + 2 * KERNEL_RADIUS]; // padding: additional KERNEL_RADIUS at both sides to prevent side effects srand(42); // always the same result // srand(time(NULL)); // random initial seed for (int i = 0; i < NBR_DATA; i++) input[KERNEL_RADIUS + i] = (rand() % 100); // / 10.0 - 5; // fill the padded elements with zeroes for (int i = 0; i < KERNEL_RADIUS; i++) { input[i] = 0; cycle_result[i] = 0; input[KERNEL_RADIUS + NBR_DATA + i] = 0; cycle_result[KERNEL_RADIUS + NBR_DATA + i] = 0; } // ** KERNEL ** TYPE* kernel = new TYPE[2 * KERNEL_RADIUS + 1]; TYPE weight = 0.1, sum =0; for (int i = 1; i <= KERNEL_RADIUS; i++) { kernel[KERNEL_RADIUS - i] = weight; kernel[KERNEL_RADIUS + i] = weight; sum += 2 * weight; weight /= 2; } kernel[KERNEL_RADIUS] = 1 - sum; // energy that remains in cell cout << "Kernel: "; for (int i = 0; i < 2 * KERNEL_RADIUS + 1; i++) { cout << setprecision(2) << kernel[i] << ", "; } cout << endl; // executing sequentially - BE AWARE THAT input array is changed!! time_point start = system_clock::now(); // ** SIMULATION ** for (int c = 0; c < CYCLES; c++) { // heat source (moving from left to right) input[KERNEL_RADIUS + c % NBR_DATA] += 200; // print array (set to false for final experiments!) if (true) { cout << "(" << c << ") "; for (int i = 0; i < NBR_DATA; i++) cout << " " << setprecision(3) << input[KERNEL_RADIUS + i]; cout << endl; } // 1 time step: convolution for (int i = 0; i < NBR_DATA; i++) { cycle_result[KERNEL_RADIUS + i] = 0; for (int k = 0; k < 2 * KERNEL_RADIUS + 1; k++) { cycle_result[KERNEL_RADIUS + i] += kernel[k] * input[i + k]; } } // swap arrays TYPE* temp = input; input = cycle_result; cycle_result = temp; } _int64 run_time_seq = duration_cast(system_clock::now() - start).count(); // print result array if (true) { cout << "(END) "; for (int i = 0; i < NBR_DATA; i++) cout << " " << setprecision(3) << input[KERNEL_RADIUS + i]; cout << endl; } cout << "Runtime for sequential version: " << run_time_seq << "us " << (run_time_seq * 1000 / NBR_DATA / (2*KERNEL_RADIUS+1)) << "ns/op" << endl; cout << endl << "Press ENTER to close window..."; char c = cin.get(); }