diff options
Diffstat (limited to 'examples/fortran/rose_gemm.f90')
-rw-r--r-- | examples/fortran/rose_gemm.f90 | 155 |
1 files changed, 0 insertions, 155 deletions
diff --git a/examples/fortran/rose_gemm.f90 b/examples/fortran/rose_gemm.f90 deleted file mode 100644 index d150922..0000000 --- a/examples/fortran/rose_gemm.f90 +++ /dev/null @@ -1,155 +0,0 @@ -PROGRAM matmul -INTEGER :: N, i, j, k -REAL(kind=8) :: a(10,10), b(10,10), c(10,10), ct(10,10), mysum -DO i = 1, 10, 1 -DO j = 1, 10, 1 -a(i,j) = i + j -b(i,j) = i - j -c(i,j) = 0.0 -ct(i,j) = 0.0 -END DO -b(i,i) = 1.0 -END DO -DO j = 1, 10, 1 -DO k = 1, 10, 1 -DO i = 1, 10, 1 -c(i,j) = c(i,j) + a(i,k) * b(k,j) -END DO -END DO -END DO -CALL gemm(10,a,b,ct) -mysum = 0.0 -DO i = 1, 10, 1 -DO j = 1, 10, 1 -mysum = c(i,j) - ct(i,j) -END DO -END DO -IF (abs(mysum) >= 0.00001) THEN -WRITE (*, FMT=*) "Something wrong" -ELSE -WRITE (*, FMT=*) "Output matches" -END IF -END PROGRAM matmul - -SUBROUTINE gemm(N,A,B,C) -INTEGER :: t12 -INTEGER :: t10 -INTEGER :: t8 -INTEGER :: t6 -INTEGER :: t4 -INTEGER :: t2 -INTEGER :: chill_t64 -INTEGER :: chill_t63 -INTEGER :: chill_t62 -INTEGER :: chill_t61 -INTEGER :: chill_t60 -INTEGER :: chill_t59 -INTEGER :: chill_t58 -INTEGER :: chill_t57 -INTEGER :: chill_t56 -INTEGER :: chill_t55 -INTEGER :: chill_t54 -INTEGER :: chill_t53 -INTEGER :: chill_t52 -INTEGER :: chill_t51 -INTEGER :: chill_t50 -INTEGER :: chill_t49 -INTEGER :: chill_t48 -INTEGER :: chill_t47 -INTEGER :: over2 -INTEGER :: chill_t46 -INTEGER :: chill_t45 -INTEGER :: chill_t44 -INTEGER :: chill_t43 -INTEGER :: chill_t42 -INTEGER :: chill_t41 -INTEGER :: chill_t40 -INTEGER :: chill_t39 -INTEGER :: chill_t38 -INTEGER :: chill_t37 -INTEGER :: chill_t36 -INTEGER :: chill_t35 -INTEGER :: chill_t34 -INTEGER :: chill_t33 -INTEGER :: chill_t32 -INTEGER :: chill_t31 -INTEGER :: chill_t30 -INTEGER :: chill_t29 -INTEGER :: chill_t28 -INTEGER :: chill_t27 -INTEGER :: chill_t26 -INTEGER :: chill_t25 -INTEGER :: chill_t24 -INTEGER :: chill_t23 -INTEGER :: over1 -INTEGER :: chill_t22 -INTEGER :: chill_t21 -INTEGER :: chill_t20 -INTEGER :: chill_t19 -INTEGER :: chill_t18 -INTEGER :: chill_t17 -INTEGER :: chill_t16 -INTEGER :: chill_t15 -REAL(kind=8), DIMENSION(8,512) :: f_P2 -INTEGER :: chill_t14 -INTEGER :: chill_t13 -INTEGER :: chill_t12 -INTEGER :: chill_t11 -INTEGER :: chill_t10 -INTEGER :: chill_t9 -INTEGER :: chill_t8 -INTEGER :: chill_t7 -REAL(kind=8), DIMENSION(512,128) :: f_P1 -INTEGER :: chill_t1 -INTEGER :: chill_t2 -INTEGER :: chill_t4 -INTEGER :: chill_t6 -INTEGER :: chill_t5 -INTEGER :: N -REAL(kind=8) :: A(N,N), B(N,N), C(N,N) -INTEGER :: I, J, K -over1 = 0 -over2 = 0 -DO t2 = 1, N, 512 -DO t4 = 1, N, 128 -DO t6 = t2, merge(N,t2 + 511,N <= t2 + 511), 1 -DO t8 = t4, merge(t4 + 127,N,t4 + 127 <= N), 1 -f_P1(t8 - t4 + 1,t6 - t2 + 1) = A(t8,t6) -END DO -END DO -DO t6 = 1, N, 8 -DO t8 = t6, merge(N,t6 + 7,N <= t6 + 7), 1 -DO t10 = t2, merge(N,t2 + 511,N <= t2 + 511), 1 -f_P2(t10 - t2 + 1,t8 - t6 + 1) = B(t10,t8) -END DO -END DO -over1 = MOD(N,2) -DO t8 = t4, merge(-over1 + N,t4 + 126,-over1 + N <= t4 + 126), 2 -over2 = MOD(N,2) -DO t10 = t6, merge(t6 + 6,N - over2,t6 + 6 <= N - over2), 2 -DO t12 = t2, merge(t2 + 511,N,t2 + 511 <= N), 1 -C(t8,t10) = C(t8,t10) + f_P1(t8 - t4 + 1,t12 - t2 + 1) * f_P2(t12 - t2 + 1,t10 - t6 + 1) -C(t8 + 1,t10) = C(t8 + 1,t10) + f_P1(t8 + 1 - t4 + 1,t12 - t2 + 1) * f_P2(t12 - t2 + 1,t10 - t6 + 1) -C(t8,t10 + 1) = C(t8,t10 + 1) + f_P1(t8 - t4 + 1,t12 - t2 + 1) * f_P2(t12 - t2 + 1,t10 + 1 - t6 + 1) -C(t8 + 1,t10 + 1) = C(t8 + 1,t10 + 1) + f_P1(t8 + 1 - t4 + 1,t12 - t2 + 1) * f_P2(t12 - t2 + 1,t10 + 1 - t6 + 1) -END DO -END DO -IF (N - 7 <= t6 .AND. 1 <= over2) THEN -DO t12 = t2, merge(N,t2 + 511,N <= t2 + 511), 1 -C(t8,N) = C(t8,N) + f_P1(t8 - t4 + 1,t12 - t2 + 1) * f_P2(t12 - t2 + 1,N - t6 + 1) -C(t8 + 1,N) = C(t8 + 1,N) + f_P1(t8 + 1 - t4 + 1,t12 - t2 + 1) * f_P2(t12 - t2 + 1,N - t6 + 1) -END DO -END IF -END DO -IF (N - 127 <= t4 .AND. 1 <= over1) THEN -DO t10 = t6, merge(t6 + 7,N,t6 + 7 <= N), 1 -DO t12 = t2, merge(t2 + 511,N,t2 + 511 <= N), 1 -C(N,t10) = C(N,t10) + f_P1(N - t4 + 1,t12 - t2 + 1) * f_P2(t12 - t2 + 1,t10 - t6 + 1) -END DO -END DO -END IF -END DO -END DO -END DO -END SUBROUTINE - |