diff options
author | dhuth <derickhuth@gmail.com> | 2014-08-27 09:52:06 -0600 |
---|---|---|
committer | dhuth <derickhuth@gmail.com> | 2014-08-27 09:52:06 -0600 |
commit | bff810cc371a38f493d688c54f71013f5a7d53bf (patch) | |
tree | fbe86954bb3c01deb21da9e41ebff5baa2889a45 /examples/fortran/rose_gemm.f90 | |
download | chill-bff810cc371a38f493d688c54f71013f5a7d53bf.tar.gz chill-bff810cc371a38f493d688c54f71013f5a7d53bf.tar.bz2 chill-bff810cc371a38f493d688c54f71013f5a7d53bf.zip |
Initial commit
Diffstat (limited to 'examples/fortran/rose_gemm.f90')
-rw-r--r-- | examples/fortran/rose_gemm.f90 | 155 |
1 files changed, 155 insertions, 0 deletions
diff --git a/examples/fortran/rose_gemm.f90 b/examples/fortran/rose_gemm.f90 new file mode 100644 index 0000000..d150922 --- /dev/null +++ b/examples/fortran/rose_gemm.f90 @@ -0,0 +1,155 @@ +PROGRAM matmul +INTEGER :: N, i, j, k +REAL(kind=8) :: a(10,10), b(10,10), c(10,10), ct(10,10), mysum +DO i = 1, 10, 1 +DO j = 1, 10, 1 +a(i,j) = i + j +b(i,j) = i - j +c(i,j) = 0.0 +ct(i,j) = 0.0 +END DO +b(i,i) = 1.0 +END DO +DO j = 1, 10, 1 +DO k = 1, 10, 1 +DO i = 1, 10, 1 +c(i,j) = c(i,j) + a(i,k) * b(k,j) +END DO +END DO +END DO +CALL gemm(10,a,b,ct) +mysum = 0.0 +DO i = 1, 10, 1 +DO j = 1, 10, 1 +mysum = c(i,j) - ct(i,j) +END DO +END DO +IF (abs(mysum) >= 0.00001) THEN +WRITE (*, FMT=*) "Something wrong" +ELSE +WRITE (*, FMT=*) "Output matches" +END IF +END PROGRAM matmul + +SUBROUTINE gemm(N,A,B,C) +INTEGER :: t12 +INTEGER :: t10 +INTEGER :: t8 +INTEGER :: t6 +INTEGER :: t4 +INTEGER :: t2 +INTEGER :: chill_t64 +INTEGER :: chill_t63 +INTEGER :: chill_t62 +INTEGER :: chill_t61 +INTEGER :: chill_t60 +INTEGER :: chill_t59 +INTEGER :: chill_t58 +INTEGER :: chill_t57 +INTEGER :: chill_t56 +INTEGER :: chill_t55 +INTEGER :: chill_t54 +INTEGER :: chill_t53 +INTEGER :: chill_t52 +INTEGER :: chill_t51 +INTEGER :: chill_t50 +INTEGER :: chill_t49 +INTEGER :: chill_t48 +INTEGER :: chill_t47 +INTEGER :: over2 +INTEGER :: chill_t46 +INTEGER :: chill_t45 +INTEGER :: chill_t44 +INTEGER :: chill_t43 +INTEGER :: chill_t42 +INTEGER :: chill_t41 +INTEGER :: chill_t40 +INTEGER :: chill_t39 +INTEGER :: chill_t38 +INTEGER :: chill_t37 +INTEGER :: chill_t36 +INTEGER :: chill_t35 +INTEGER :: chill_t34 +INTEGER :: chill_t33 +INTEGER :: chill_t32 +INTEGER :: chill_t31 +INTEGER :: chill_t30 +INTEGER :: chill_t29 +INTEGER :: chill_t28 +INTEGER :: chill_t27 +INTEGER :: chill_t26 +INTEGER :: chill_t25 +INTEGER :: chill_t24 +INTEGER :: chill_t23 +INTEGER :: over1 +INTEGER :: chill_t22 +INTEGER :: chill_t21 +INTEGER :: chill_t20 +INTEGER :: chill_t19 +INTEGER :: chill_t18 +INTEGER :: chill_t17 +INTEGER :: chill_t16 +INTEGER :: chill_t15 +REAL(kind=8), DIMENSION(8,512) :: f_P2 +INTEGER :: chill_t14 +INTEGER :: chill_t13 +INTEGER :: chill_t12 +INTEGER :: chill_t11 +INTEGER :: chill_t10 +INTEGER :: chill_t9 +INTEGER :: chill_t8 +INTEGER :: chill_t7 +REAL(kind=8), DIMENSION(512,128) :: f_P1 +INTEGER :: chill_t1 +INTEGER :: chill_t2 +INTEGER :: chill_t4 +INTEGER :: chill_t6 +INTEGER :: chill_t5 +INTEGER :: N +REAL(kind=8) :: A(N,N), B(N,N), C(N,N) +INTEGER :: I, J, K +over1 = 0 +over2 = 0 +DO t2 = 1, N, 512 +DO t4 = 1, N, 128 +DO t6 = t2, merge(N,t2 + 511,N <= t2 + 511), 1 +DO t8 = t4, merge(t4 + 127,N,t4 + 127 <= N), 1 +f_P1(t8 - t4 + 1,t6 - t2 + 1) = A(t8,t6) +END DO +END DO +DO t6 = 1, N, 8 +DO t8 = t6, merge(N,t6 + 7,N <= t6 + 7), 1 +DO t10 = t2, merge(N,t2 + 511,N <= t2 + 511), 1 +f_P2(t10 - t2 + 1,t8 - t6 + 1) = B(t10,t8) +END DO +END DO +over1 = MOD(N,2) +DO t8 = t4, merge(-over1 + N,t4 + 126,-over1 + N <= t4 + 126), 2 +over2 = MOD(N,2) +DO t10 = t6, merge(t6 + 6,N - over2,t6 + 6 <= N - over2), 2 +DO t12 = t2, merge(t2 + 511,N,t2 + 511 <= N), 1 +C(t8,t10) = C(t8,t10) + f_P1(t8 - t4 + 1,t12 - t2 + 1) * f_P2(t12 - t2 + 1,t10 - t6 + 1) +C(t8 + 1,t10) = C(t8 + 1,t10) + f_P1(t8 + 1 - t4 + 1,t12 - t2 + 1) * f_P2(t12 - t2 + 1,t10 - t6 + 1) +C(t8,t10 + 1) = C(t8,t10 + 1) + f_P1(t8 - t4 + 1,t12 - t2 + 1) * f_P2(t12 - t2 + 1,t10 + 1 - t6 + 1) +C(t8 + 1,t10 + 1) = C(t8 + 1,t10 + 1) + f_P1(t8 + 1 - t4 + 1,t12 - t2 + 1) * f_P2(t12 - t2 + 1,t10 + 1 - t6 + 1) +END DO +END DO +IF (N - 7 <= t6 .AND. 1 <= over2) THEN +DO t12 = t2, merge(N,t2 + 511,N <= t2 + 511), 1 +C(t8,N) = C(t8,N) + f_P1(t8 - t4 + 1,t12 - t2 + 1) * f_P2(t12 - t2 + 1,N - t6 + 1) +C(t8 + 1,N) = C(t8 + 1,N) + f_P1(t8 + 1 - t4 + 1,t12 - t2 + 1) * f_P2(t12 - t2 + 1,N - t6 + 1) +END DO +END IF +END DO +IF (N - 127 <= t4 .AND. 1 <= over1) THEN +DO t10 = t6, merge(t6 + 7,N,t6 + 7 <= N), 1 +DO t12 = t2, merge(t2 + 511,N,t2 + 511 <= N), 1 +C(N,t10) = C(N,t10) + f_P1(N - t4 + 1,t12 - t2 + 1) * f_P2(t12 - t2 + 1,t10 - t6 + 1) +END DO +END DO +END IF +END DO +END DO +END DO +END SUBROUTINE + |