program fft_with_HPF parameter(indx = 15) parameter(nx = 2**indx,maxiters=1000) integer comm,myid,nnodes,ierr complex,allocatable,dimension(:) :: f,g,sct,a,b,t integer,allocatable,dimension(:) :: mixup double precision time_begin,time_end double precision elapsed_time ! HPF$ processors proc(number_of_processors()) !HPF$ distribute (block) :: f,g,sct,a,b,t,mixup interface subroutine bitrv1(mixup,isign,indx,kxp,kblok,sct) integer,intent(inout) :: mixup(:) complex,intent(inout) :: sct(:) !HPF$ distribute *(block) :: mixup,sct end subroutine subroutine permute(f,g,mixup,indx,kxp,kblok) integer,intent(inout) :: mixup(:) complex,intent(inout) :: f(:),g(:) !HPF$ distribute *(block) :: mixup,f,g end subroutine subroutine fft(f,g,isign,mixup,sct,indx,kxp,kblok) integer,intent(inout) :: mixup(:) complex,intent(inout) :: sct(:),f(:),g(:) !HPF$ distribute *(block) :: mixup,f,g,sct end subroutine end interface kblok = number_of_processors() kxp = nx/kblok isign = 1 allocate(f(kxp*kblok)) allocate(g(kxp*kblok)) allocate(sct(kxp*kblok)) allocate(mixup(kxp*kblok)) forall(i=1:nx) g(i) = cmplx(99.0,-9.0) endforall !start timer call timer(time_begin) print *,elapsed_time() call bitrv1(mixup,isign,indx,kxp,kblok,sct) !do iter =1,maxiters call permute(f,g,mixup,indx,kxp,kblok) call fft(f,g,isign,mixup,sct,indx,kxp,kblok) !enddo !stop timer print *,'after fft',elapsed_time() call timer(time_end) print *,'fft of 1-d with size= ',nx,' took ' print *,time_end-time_begin,' seconds for ',maxiters,' iterations' end subroutine timer(t) integer val(8) double precision t call date_and_time(values=val) t = dble(val(8))*1d-3 + dble(val(7)) + & dble(val(6))*60d0 + dble(val(5))*3600d0 end subroutine timer double precision function elapsed_time() integer :: clock_count, clock_rate, clock_max integer, save :: last_count logical, save :: system_clock_not_yet_called = .true. call system_clock(count=clock_count,& count_rate=clock_rate, & count_max=clock_max) if (system_clock_not_yet_called) then system_clock_not_yet_called = .false. elapsed_time = 0.0 else if (clock_count > last_count) then elapsed_time = real (clock_count-last_count)/real(clock_rate) else elapsed_time = real (clock_max-last_count+clock_count)& /real(clock_rate) end if last_count = clock_count return end function elapsed_time !kblok is the number of processors !g is the input vector whose fft will be taken !kxp is the length of local data subroutine fft(f,g,isign,mixup,sct,indx,kxp,kblok) integer,intent(inout) :: mixup(:) complex,intent(inout) :: sct(:),f(:),g(:) !HPF$ distribute *(block) :: mixup,f,g,sct integer lb,ub,sign,info,iarg complex t1,tcmlx complex global_g(kxp,kblok) nx = 2**indx nxh = nx/2 indx1 = indx-1 kbs = nxh/kxp dnx = 6.28318530717959/float(nx) sign = isign kxp1 = nx/kblok !C --------------------------- !C inverse fourier transform !C bit-reverse array elements to temporary !C --------------------------- do l = 1, indx1 nxs = 2**(l - 1) kxs = nxs/kxp !C --------------------------- !C local calculation !C --------------------------- if (kxs.eq.0) then !HPF$ INDEPENDENT,new(j,lb) do j = 1,nx lb = (j - 1)/nxs if (btest(lb,0) .eqv. .FALSE.) then f(j) = g(j+nxs) else f(j) = g(j-nxs) endif enddo !C --------------------------- !C perform reduction !C --------------------------- km = kxp/nxs !HPF$ INDEPENDENT,new(j,kb,lb,t1,iarg) do j = 1, nx kb = j - 1 lb = kb/nxs iarg = 1+km*(kb-nxs*lb) if (sign .eq. -1) then t1 = sct(iarg+ j) else if (sign .eq.1) then tcmlx = sct(iarg+j) t1 = conjg(tcmlx ) endif if (btest(lb,0) .eqv. .FALSE.) then g(j) = g(j) + t1*f(j) else g(j) = f(j) - t1*g(j) endif enddo else !C --------------------------- !C copy data !C --------------------------- kb = 1 - 1 lb = kb/kxs kb = kb + 1 !HPF$ INDEPENDENT,new(k,kb,lb,j) do j = 1, nx if (btest(lb,0) .eqv. .FALSE.) then f(j) = g(j+(kb+kxs)*kxp) else f(j) = g(j+(kb-kxs)*kxp) endif enddo !C ------------------ !C perform reduction !C ------------------ km = nxh/nxs dns = dnx*float(km) kb = 1-1 lb = kb/kxs kb = kxp*(kb - kxs*lb) - 1 !HPF$ INDEPENDENT ,new(j,arg,t1) do j = 1, nx arg = dns*float(j + kb) t1 = cmplx(cos(arg),sign*sin(arg)) if (btest(lb,0) .eqv. .FALSE.) then g(j) = g(j) + t1*f(j) else g(j) = f(j) - t1*g(j) endif enddo endif enddo return end subroutine subroutine bitrv1(mixup,isign,indx,kxp,kblok,sct) integer,intent(inout) :: mixup(:) complex,intent(inout) :: sct(:) !HPF$ distribute *(block) :: mixup,sct integer isign,indx,kxp,kblok integer llb,ub real arg,dnx nx = 2**indx nxh = nx/2 dnx = 6.28318530717959/float(nx) !C --------------------------------- !C prepare bit-reverse index table !C --------------------------------- !HPF$ INDEPENDENT,new(i,j,lb,ll,l,jb,it) do i=1,nx lb = j -1 ll = 0 do l = 1, indx jb = lb/2 it = lb - 2*jb lb = jb ll = 2*ll + it enddo mixup(i) = ll + 1 enddo !HPF$ INDEPENDENT,new(j,arg) do j = 1,nx arg = dnx*float((nxh*(j - 1))/kxp) sct(j) = cmplx(cos(arg),-sin(arg)) enddo return end subroutine permute(f,g,mixup,indx,kxp,kblok) integer,intent(inout) :: mixup(:) complex,intent(inout) :: f(:),g(:) !HPF$ distribute *(block) :: mixup,f,g !C --------------------------- !C inverse fourier transform !C bit-reverse array elements to temporary !C --------------------------- !HPF$ INDEPENDENT,new(i,ll) do i=1,nx ll = mixup(j+k*kxp) g(i) = f(ll) enddo return end