Click on “” to show the content for each version, and click on “” to hide.
export PATH=<yourpath>/src/bin:$PATH
<yourpath>
by the path where the metafork/ folder is. export PATH=<yourpath>/src/bin:$PATH
<yourpath>
by the path where the metafork/ folder is. export PATH=<yourpath>/MetaFork_version_1.0/build/bin:$PATH
export C_INCLUDE_PATH=<yourpath>/MetaFork_version_1.0/tools/clang/include/clang/AST/:$C_INCLUDE_PATH
export CPLUS_INCLUDE_PATH=<yourpath>/MetaFork_version_1.0/tools/clang/include/clang/AST/:$CPLUS_INCLUDE_PATH
<yourpath>
by the path where the MetaFork_version_1.0/ folder is. export CLANG_ROOT=<yourpath>
export PATH=$CLANG_ROOT/bin:$PATH
export LD_LIBRARY_PATH=$CLANG_ROOT/lib:$LD_LIBRARY_PATH
export LIBRARY_PATH=$CLANG_ROOT/lib:$LIBRARY_PATH
export C_INCLUDE_PATH=$CLANG_ROOT/include/:$C_INCLUDE_PATH
export CPLUS_INCLUDE_PATH=$CLANG_ROOT/include/:$CPLUS_INCLUDE_PATH
<yourpath>
by the path where the clang+llvm-3.6.2-x86_64-linux-gnu-ubuntu-14.04/ folder is. export PATH=<yourpath>/install/bin:$PATH
export C_INCLUDE_PATH=<yourpath>/install/include:$C_INCLUDE_PATH
export CPLUS_INCLUDE_PATH=<yourpath>/install/include:$CPLUS_INCLUDE_PATH
<yourpath>
by the path where the distrib folder is. .bashrc
file, add the following lines:
export METAFORKPPCG_ROOT=/your/metafork-ppcg/path
export PATH=$METAFORKPPCG_ROOT/.libs:$PATH
export LIBRARY_PATH=$METAFORKPPCG_ROOT/isl/.libs/:$LIBRARY_PATH
export LD_LIBRARY_PATH=$METAFORKPPCG_ROOT/isl/.libs/:$LD_LIBRARY_PATH
export LIBRARY_PATH=$METAFORKPPCG_ROOT/pet/.libs/:$LIBRARY_PATH
export LD_LIBRARY_PATH=$METAFORKPPCG_ROOT/pet/.libs/:$LD_LIBRARY_PATH
export CPLUS_INCLUDE_PATH=$METAFORKPPCG_ROOT/pet/include/:$CPLUS_INCLUDE_PATH
export C_INCLUDE_PATH=$METAFORKPPCG_ROOT/pet/include/:$C_INCLUDE_PATH
for (int t = 0; t < T; ++t) { for (int i = 0; i < N-2; ++i) b[i+1] = (a[i] + a[i+1] + a[i+2]) / 3; for (int i = 0; i < N-2; ++i) a[i+1] = b[i+1]; }MetaFork code:
// N is the input array size, which is 2^k+2, k is a positive integer // B is the number of threads per thread-block int ub_v = (N - 2) / B; meta_schedule { for (int t = 0; t < T; ++t) { meta_for (int v = 0; v < ub_v; v++) meta_for (int u = 0; u < B; u++) { int p = v * B + u; b[p+1] = (a[p] + a[p+1] + a[p+2]) / 3; } meta_for (int v = 0; v < ub_v; v++) meta_for (int u = 0; u < B; u++) { int w = v * B + u; a[w+1] = b[w+1]; } } }
__global__ void kernel0(int *a, int *b, int N, int T, int ub_v, int B, int c0) { int b0 = blockIdx.x; int t0 = threadIdx.x; int private_p; #define floord(n,d) (((n)<0) ? -((-(n)+(d)-1)/(d)) : (n)/(d)) for (int c1 = b0; c1 < ub_v; c1 += 32768) { private_p = (((c1) * (B)) + (t0)); b[private_p + 1] = (((a[private_p] + a[private_p + 1]) + a[private_p + 2]) / 3); __syncthreads(); } } __global__ void kernel1(int *a, int *b, int N, int T, int ub_v, int B, int c0) { int b0 = blockIdx.x; int t0 = threadIdx.x; int private_w; #define floord(n,d) (((n)<0) ? -((-(n)+(d)-1)/(d)) : (n)/(d)) for (int c1 = b0; c1 < ub_v; c1 += 32768) { private_w = (((c1) * (B)) + (t0)); a[private_w + 1] = b[private_w + 1]; __syncthreads(); } }- jacobi.c
__global__ void kernel0(int *a, int *b, int N, int T, int ub_v, int B, int c0) { int b0 = blockIdx.x; int t0 = threadIdx.x; int private_p; __shared__ int shared_a[BLOCK_0+2]; #define floord(n,d) (((n)<0) ? -((-(n)+(d)-1)/(d)) : (n)/(d)) #define min(x,y) ((x) < (y) ? (x) : (y)) for (int c1 = b0; c1 < ub_v; c1 += 32768) { for (int c2 = t0; c2 <= min(B + 1, N - B * c1 - 1); c2 += B) shared_a[c2] = a[B * c1 + c2]; __syncthreads(); private_p = (((c1) * (B)) + (t0)); b[private_p + 1] = (((shared_a[private_p - B * c1] + shared_a[private_p - B * c1 + 1]) + shared_a[private_p - B * c1 + 2]) / 3); __syncthreads(); } } __global__ void kernel1(int *a, int *b, int N, int T, int ub_v, int B, int c0) { int b0 = blockIdx.x; int t0 = threadIdx.x; int private_w; __shared__ int shared_b[BLOCK_0]; #define floord(n,d) (((n)<0) ? -((-(n)+(d)-1)/(d)) : (n)/(d)) for (int c1 = b0; c1 < ub_v; c1 += 32768) { if (N >= t0 + B * c1 + 2) shared_b[t0] = b[t0 + B * c1 + 1]; __syncthreads(); private_w = (((c1) * (B)) + (t0)); a[private_w + 1] = shared_b[private_w - B * c1]; __syncthreads(); } }- jacobi.c