Skip to content
Snippets Groups Projects
Commit f24faeec authored by gkusoglu's avatar gkusoglu
Browse files

trying optimize 2mm

parent 75c21a6f
Branches
No related merge requests found
...@@ -87,23 +87,22 @@ void kernel_2mm(int ni, int nj, int nk, int nl, ...@@ -87,23 +87,22 @@ void kernel_2mm(int ni, int nj, int nk, int nl,
#pragma scop #pragma scop
/* D := alpha*A*B*C + beta*D */ /* D := alpha*A*B*C + beta*D */
for (i = 0; i < _PB_NI; i++) for (i = 0; i < _PB_NI; i++)
{
for (j = 0; j < _PB_NJ; j++) for (j = 0; j < _PB_NJ; j++)
{
tmp[i][j] = SCALAR_VAL(0.0); tmp[i][j] = SCALAR_VAL(0.0);
for (i = 0; i < _PB_NI; i++)
for (j = 0; j < _PB_NJ; j++)
for (k = 0; k < _PB_NK; ++k) for (k = 0; k < _PB_NK; ++k)
tmp[i][j] += alpha * A[i][k] * B[k][j]; tmp[i][j] += alpha * A[i][k] * B[k][j];
}
}
for (i = 0; i < _PB_NI; i++) for (i = 0; i < _PB_NI; i++)
{
for (j = 0; j < _PB_NL; j++) for (j = 0; j < _PB_NL; j++)
{
D[i][j] *= beta; D[i][j] *= beta;
for (i = 0; i < _PB_NI; i++)
for (j = 0; j < _PB_NL; j++)
for (k = 0; k < _PB_NJ; ++k) for (k = 0; k < _PB_NJ; ++k)
D[i][j] += tmp[i][k] * C[k][j]; D[i][j] += tmp[i][k] * C[k][j];
}
}
#pragma endscop #pragma endscop
} }
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment