Click on “
” to show the content for each version, and click on “
” to hide.
export PATH=<yourpath>/src/bin:$PATH
<yourpath> by the path where the metafork/ folder is. export PATH=<yourpath>/src/bin:$PATH
<yourpath> by the path where the metafork/ folder is. export PATH=<yourpath>/MetaFork_version_1.0/build/bin:$PATH export C_INCLUDE_PATH=<yourpath>/MetaFork_version_1.0/tools/clang/include/clang/AST/:$C_INCLUDE_PATH export CPLUS_INCLUDE_PATH=<yourpath>/MetaFork_version_1.0/tools/clang/include/clang/AST/:$CPLUS_INCLUDE_PATH
<yourpath> by the path where the MetaFork_version_1.0/ folder is. export CLANG_ROOT=<yourpath> export PATH=$CLANG_ROOT/bin:$PATH export LD_LIBRARY_PATH=$CLANG_ROOT/lib:$LD_LIBRARY_PATH export LIBRARY_PATH=$CLANG_ROOT/lib:$LIBRARY_PATH export C_INCLUDE_PATH=$CLANG_ROOT/include/:$C_INCLUDE_PATH export CPLUS_INCLUDE_PATH=$CLANG_ROOT/include/:$CPLUS_INCLUDE_PATH <yourpath> by the path where the clang+llvm-3.6.2-x86_64-linux-gnu-ubuntu-14.04/ folder is. export PATH=<yourpath>/install/bin:$PATH export C_INCLUDE_PATH=<yourpath>/install/include:$C_INCLUDE_PATH export CPLUS_INCLUDE_PATH=<yourpath>/install/include:$CPLUS_INCLUDE_PATH <yourpath> by the path where the distrib folder is. .bashrc file, add the following lines:
export METAFORKPPCG_ROOT=/your/metafork-ppcg/path export PATH=$METAFORKPPCG_ROOT/.libs:$PATH export LIBRARY_PATH=$METAFORKPPCG_ROOT/isl/.libs/:$LIBRARY_PATH export LD_LIBRARY_PATH=$METAFORKPPCG_ROOT/isl/.libs/:$LD_LIBRARY_PATH export LIBRARY_PATH=$METAFORKPPCG_ROOT/pet/.libs/:$LIBRARY_PATH export LD_LIBRARY_PATH=$METAFORKPPCG_ROOT/pet/.libs/:$LD_LIBRARY_PATH export CPLUS_INCLUDE_PATH=$METAFORKPPCG_ROOT/pet/include/:$CPLUS_INCLUDE_PATH export C_INCLUDE_PATH=$METAFORKPPCG_ROOT/pet/include/:$C_INCLUDE_PATH
for (int t = 0; t < T; ++t) {
for (int i = 0; i < N-2; ++i)
b[i+1] = (a[i] + a[i+1] + a[i+2]) / 3;
for (int i = 0; i < N-2; ++i)
a[i+1] = b[i+1];
}
MetaFork code:
// N is the input array size, which is 2^k+2, k is a positive integer
// B is the number of threads per thread-block
int ub_v = (N - 2) / B;
meta_schedule {
for (int t = 0; t < T; ++t) {
meta_for (int v = 0; v < ub_v; v++)
meta_for (int u = 0; u < B; u++) {
int p = v * B + u;
b[p+1] = (a[p] + a[p+1] + a[p+2]) / 3;
}
meta_for (int v = 0; v < ub_v; v++)
meta_for (int u = 0; u < B; u++) {
int w = v * B + u;
a[w+1] = b[w+1];
}
}
}
__global__ void kernel0(int *a, int *b, int N, int T, int ub_v,
int B, int c0) {
int b0 = blockIdx.x;
int t0 = threadIdx.x;
int private_p;
#define floord(n,d) (((n)<0) ? -((-(n)+(d)-1)/(d)) : (n)/(d))
for (int c1 = b0; c1 < ub_v; c1 += 32768) {
private_p = (((c1) * (B)) + (t0));
b[private_p + 1] = (((a[private_p] + a[private_p + 1])
+ a[private_p + 2]) / 3);
__syncthreads();
}
}
__global__ void kernel1(int *a, int *b, int N, int T, int ub_v,
int B, int c0) {
int b0 = blockIdx.x;
int t0 = threadIdx.x;
int private_w;
#define floord(n,d) (((n)<0) ? -((-(n)+(d)-1)/(d)) : (n)/(d))
for (int c1 = b0; c1 < ub_v; c1 += 32768) {
private_w = (((c1) * (B)) + (t0));
a[private_w + 1] = b[private_w + 1];
__syncthreads();
}
}
- jacobi.c
__global__ void kernel0(int *a, int *b, int N, int T, int ub_v, int B, int c0) {
int b0 = blockIdx.x;
int t0 = threadIdx.x;
int private_p;
__shared__ int shared_a[BLOCK_0+2];
#define floord(n,d) (((n)<0) ? -((-(n)+(d)-1)/(d)) : (n)/(d))
#define min(x,y) ((x) < (y) ? (x) : (y))
for (int c1 = b0; c1 < ub_v; c1 += 32768) {
for (int c2 = t0; c2 <= min(B + 1, N - B * c1 - 1); c2 += B)
shared_a[c2] = a[B * c1 + c2];
__syncthreads();
private_p = (((c1) * (B)) + (t0));
b[private_p + 1] = (((shared_a[private_p - B * c1] + shared_a[private_p - B * c1 + 1])
+ shared_a[private_p - B * c1 + 2]) / 3);
__syncthreads();
}
}
__global__ void kernel1(int *a, int *b, int N, int T, int ub_v, int B, int c0) {
int b0 = blockIdx.x;
int t0 = threadIdx.x;
int private_w;
__shared__ int shared_b[BLOCK_0];
#define floord(n,d) (((n)<0) ? -((-(n)+(d)-1)/(d)) : (n)/(d))
for (int c1 = b0; c1 < ub_v; c1 += 32768) {
if (N >= t0 + B * c1 + 2)
shared_b[t0] = b[t0 + B * c1 + 1];
__syncthreads();
private_w = (((c1) * (B)) + (t0));
a[private_w + 1] = shared_b[private_w - B * c1];
__syncthreads();
}
}
- jacobi.c