strassen.f90 2.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677
  1. ! { dg-options "-O2" }
  2. ! { dg-skip-if "AArch64 tiny code model does not support programs larger than 1MiB" {aarch64_tiny} {"*"} {""} }
  3. program strassen_matmul
  4. use omp_lib
  5. integer, parameter :: N = 1024
  6. double precision, save :: A(N,N), B(N,N), C(N,N), D(N,N)
  7. double precision :: start, end
  8. call random_seed
  9. call random_number (A)
  10. call random_number (B)
  11. start = omp_get_wtime ()
  12. C = matmul (A, B)
  13. end = omp_get_wtime ()
  14. write(*,'(a, f10.6)') ' Time for matmul = ', end - start
  15. D = 0
  16. start = omp_get_wtime ()
  17. call strassen (A, B, D, N)
  18. end = omp_get_wtime ()
  19. write(*,'(a, f10.6)') ' Time for Strassen = ', end - start
  20. if (sqrt (sum ((C - D) ** 2)) / N .gt. 0.1) call abort
  21. D = 0
  22. start = omp_get_wtime ()
  23. !$omp parallel
  24. !$omp single
  25. call strassen (A, B, D, N)
  26. !$omp end single nowait
  27. !$omp end parallel
  28. end = omp_get_wtime ()
  29. write(*,'(a, f10.6)') ' Time for Strassen MP = ', end - start
  30. if (sqrt (sum ((C - D) ** 2)) / N .gt. 0.1) call abort
  31. contains
  32. recursive subroutine strassen (A, B, C, N)
  33. integer, intent(in) :: N
  34. double precision, intent(in) :: A(N,N), B(N,N)
  35. double precision, intent(out) :: C(N,N)
  36. double precision :: T(N/2,N/2,7)
  37. integer :: K, L
  38. if (iand (N,1) .ne. 0 .or. N < 64) then
  39. C = matmul (A, B)
  40. return
  41. end if
  42. K = N / 2
  43. L = N / 2 + 1
  44. !$omp task shared (A, B, T)
  45. call strassen (A(:K,:K) + A(L:,L:), B(:K,:K) + B(L:,L:), T(:,:,1), K)
  46. !$omp end task
  47. !$omp task shared (A, B, T)
  48. call strassen (A(L:,:K) + A(L:,L:), B(:K,:K), T(:,:,2), K)
  49. !$omp end task
  50. !$omp task shared (A, B, T)
  51. call strassen (A(:K,:K), B(:K,L:) - B(L:,L:), T(:,:,3), K)
  52. !$omp end task
  53. !$omp task shared (A, B, T)
  54. call strassen (A(L:,L:), B(L:,:K) - B(:K,:K), T(:,:,4), K)
  55. !$omp end task
  56. !$omp task shared (A, B, T)
  57. call strassen (A(:K,:K) + A(:K,L:), B(L:,L:), T(:,:,5), K)
  58. !$omp end task
  59. !$omp task shared (A, B, T)
  60. call strassen (A(L:,:K) - A(:K,:K), B(:K,:K) + B(:K,L:), T(:,:,6), K)
  61. !$omp end task
  62. !$omp task shared (A, B, T)
  63. call strassen (A(:K,L:) - A(L:,L:), B(L:,:K) + B(L:,L:), T(:,:,7), K)
  64. !$omp end task
  65. !$omp taskwait
  66. C(:K,:K) = T(:,:,1) + T(:,:,4) - T(:,:,5) + T(:,:,7)
  67. C(L:,:K) = T(:,:,2) + T(:,:,4)
  68. C(:K,L:) = T(:,:,3) + T(:,:,5)
  69. C(L:,L:) = T(:,:,1) - T(:,:,2) + T(:,:,3) + T(:,:,6)
  70. end subroutine strassen
  71. end