***************************************************************************** * Laplace Equation Numerically Solved by Jacobi Iteration Method on NPACI T3E * Temperature T is initially 25 everywhere except at boundaries. * Revision for Correcting Errors and Stopping for Convergence. * F98+ Correcting Errors Found by QL in Class * F98+ Using Exact Test Case T(x,y)=100*x*y for Txx+Tyy=0 on LX by LY Rect. * Fall00: Correction for New MPI from MPT (Message Passsing ToolKit), NPACI * Fall00: Thanks to NPACI Consultant Kent MilFeld for MPT Changes Fix. * * Physical Problem Domain Decomposed Array Problem * T=100x 0X,J * |y | | |* | | | | | * | | | I| | | | | | * T=0 |T=Tinit| T=100Y 0x LY NR+1|____|____|____|____| v Y,I** * 0.0 LX 0 J NC+1 * T=0 0.0 LX/4 LX/2 3LX/4 LX * * Artificial, Interior Boundary OverLap, Using N = NC4, #XCols, Local * and p = NPROC, #NumberVirtualProcessors * 0 1 ... N N+1 0 1 ... N N+1 0 1 ... N N+1 * VPE0 | - - - - - | VPE2 | - - - - - | .... | - - - - - | * | - - - - - | VPE1 | - - - - - | VPE3 | - .... - | VPE[q-1] * 0 1 ... N N+1 0 1 ... N N+1 0 1 ... N N+1 * * 0 1 N N 2 2 3 3 4 4 q q (global * + * * * * * * * * node * 1 N N N N N N N N numbering) * + + + + (q=p-1) * 1 1 1 1 * Thus, RightBoundaryLeftPanel(N+1) Is FirstPointInsideRightPanel(1). * Use Central Differencing Method with 4 PEs. * Rectangular Domain is decomposed into 4 vertical panels for Col-wise Fortran. * Each process gets its own copy of the code to execute, * with parallel synchronization handled by MPI in mpprun environment. * Each process only has subgrid for VPE=0, 1, 2, ..., npes-1 (=3 here), * where VPE=VirtualOrUserProcessingElement. * Each Processor works on sub grid and then sends its Boundaries to neighbours * ** In array formulation, Y,I axis is Reversed Since T(0,0) Element at TopLeft * Note: Array T(i,j) is Only Local to Each PE and its Panel! * *98: FH/MCS572F98 interactive run suggestions: *98 compile and load: f90 -O2 -r3 -Xm -o [executable] [source].f & *98 execute: mpprun -n4 [executable]<[data]>[source].output& *98 Input `[data]' contains maximum number of interations: niter *97 Try ps Process Status Command While Running & Should See 4 Jobs Executing! *97 Try jobs Jobs Status Command to Check for Running, Exit or Done. *F98 Assistance Qiming Lian and group .... Dec 1998 * * Susheel Chitre PSC.... Feb 1994: original source *********************************************************** program laplace implicit none * MPI Step: Include MPI Fortran Header Definitions and Declarations include 'mpif.h' * New Definitions Correct Coarse Mesh Error In Original integer, parameter :: NPROC = 4 !NPROC=#procs; integer, parameter :: NYI = 201 !NYI=#YSubInts, global; integer, parameter :: NXI = 201 ! NXI=#XSubInts, global; integer, parameter :: NR = NYI-1 ! NR=#InteriorRows, local; integer, parameter :: NC = NXI-1 ! NC=#InteriorCols, local; integer, parameter :: NC4 = NC/NPROC ! NC4=#InteriorCols, Local; integer, parameter :: MXITER = 99999 ! MXITER=LimitMaxIterations: * Caution: Count Subintervals Rather Than Interior(Rows,Cols) which are * not properly divisible while Subintervals are (ERROR CORRECTED). * Let (LY,LX)=(Y,X)Lengths;(HY,HX)=(DY,DX)=(Y,X)StepSize: Corrected Original real*8, parameter :: LY = 1.e0 real*8, parameter :: LX = 1.e0 real*8, parameter :: HY = LY/NYI real*8, parameter :: HX = LX/NXI * Let NR2=#Rows/2;NC0=StartCol;NCE=EndCol;DNC=ColStep for Sample Print integer, parameter :: NR2 = NYI/2 integer, parameter :: NC0 = NC4/25 integer, parameter :: NCE = NC4-NC0+1 integer, parameter :: NCL2 = NC4/2 integer, parameter :: DNC = NCL2-NC0 integer, parameter :: NRO0 = 0 integer, parameter :: NROE = NYI integer, parameter :: DNRO = NYI/20 integer, parameter :: NCO0 = 0 integer, parameter :: NCOE = NC4 integer, parameter :: DNCO = NC4/5 * Let stoptol=Stopping Tolerance for dtg=GlobalMaxAbsChange real*8, parameter :: stoptol = 0.5e-2 ! 2 digits rounded * Let Message Tag Labels Be integer, parameter :: LEFT = 100 ! LEFT = LeftMsgTag; integer, parameter :: RIGHT = 101 ! RIGHT = RightMsgTag; integer, parameter :: ROOT = 0 ! ROOT = MasterVPE#. * Timer: Declare MPI Wall Timer Variables (double precision is not supported): real*8 starttime,stoptime,timerres,totalwalltime * Let T(*,*)=LocalPanelTemp;Told(*,*)=OldLocalPanelTemp;dt=LocalMaxAbsChange; * and dtg=GlobalMaxAbsChange;Tinit=InitialTemp;Tex=ExactTemp; * and dtexv=LocalAbsErrorVector;dtexg=GlobalMaxAbsError: real*8, dimension (0:NYI,0:NC4+1) :: T, Told real*8, dimension (0:NYI,0:NC4+1) :: Tex, dtexv real*8 Terr, dtex, dtexg, reldtexg, dt, dtg real*8, parameter :: Tinit=25.0 !F98+1change * Let i=Row(y)Index;j=Col(x)Index;iter=CurrentIteration;niter=#Iterations; * and mype=LocalVPE#;npes=#PEs;ierr=MPIReturnOrErrorCode;nstop=StopIter: integer i, j, iter, niter, mype, npes, ierr, nstop * MPI Step: Declare status, given included MPI_STATUS_SIZE value: integer status(MPI_STATUS_SIZE) * MPI Step: Initialize MPI, giving each PE copy of code;MPI required subroutine; * using programming model:SPMD=SglPgmMultiData. Call MPI_Init(ierr) * MPI Step: Start MPI Wall Timer and get Tick Resolution: starttime = MPI_Wtime() timerres = MPI_Wtick() * MPI Step: Get T3E assigned npes=#PEs or Communication_size: Call MPI_Comm_size(MPI_COMM_WORLD, npes, ierr) * where MPI_COMM_WORLD=MPI_Communication_World=UserSetOfPEs. CAUTION: npes MustEqual NPROC, So if Change npes, Then Change NPROC. * MPI Step: Get local (this PE) mype=PE#, also called Communication_rank: Call MPI_Comm_rank(MPI_COMM_WORLD, mype, ierr) * Laplace: Set Local Initial Conditions & Exact Test for Laplace Equation: Cut F98: Call initialize( T ) Do i=0, NYI Do j=0, NC4+1 T(i,j)=Tinit Tex(i,j)=100.0*i*HY*(j+mype*NC4)*HX Enddo Enddo C ---------------------- * Laplace: Set Local Boundary Conditions for Laplace Equation: Cut F98: Call set_bcs( T, mype, npes ) * * Geometric Left And Array Left: * If( mype.eq.0 ) then Do i=1, NYI T(i,0) = 0.0 Enddo Endif * * Geometric Right And Array Right: * If( mype.eq.npes-1 ) then Do i=1,NYI T(i,NC4+1) = 100.0*i*HY Enddo Endif * * Set Bottom and Top Boundaries: * Do j=0,NC4+1 * Geometric Bottom, But Array Top: T(0,j) = 0.0 * Geometric Top, But Array Bottom: T(NYI,j) = 100.0*(j+mype*NC4)*HX Enddo C ---------------------------------------- * Initialize Iteration Stopping Parameter nstop = 0 * Laplace: Root Reads Number of Iterations for this Job from "data" Input File: If( mype .eq. ROOT ) then read*, niter ! suggest niter = 20000 if stoptol = 0.5e-4 If( niter.gt.MXITER ) niter = MXITER * Correct Local Interior Point Count so Sum = NC = #TotalInteriorPoints Endif * Laplace: Save Old Iteration: Do i=0, NR+1 Do j=0, NC4+1 Told(i,j) = T(i,j) Enddo Enddo * MPI Step: All VPES Get Collective Communication of niter from Root: Call MPI_Bcast(niter, 1, MPI_INTEGER, ROOT, MPI_COMM_WORLD, ierr) * where count is 1 and datatype is MPI_INTEGER. * * Do Computation on Local Sub-grid for niter 5-Star Jacobi Iterations: * Corrected for Arbitrary (LY,LX)Lengths. * Do 110 iter=1,niter Do j=1,NC4 Do i=1,NR T(i,j) = 0.5 * ((Told(i+1,j)+Told(i-1,j))*HX**2 $ + (Told(i,j+1)+Told(i,j-1))*HY**2) $ /(HX**2+HY**2) Enddo Enddo * * Get dt=LocalMaxAbsChange for this PE and Save Old Temperature: * dt = 0 dtex = 0 Do j=1,NC4 Do i=1,NR dt = max( abs(T(i,j) - Told(i,j)), dt ) dtexv(i,j)= abs(T(i,j) - Tex(i,j)) dtex = max( abs(T(i,j) - Tex(i,j)), dtex ) Told(i,j) = T(i,j) Enddo Enddo * * MPI Step: mype Sends Right Boundary Data to Right Neighbor mype+1, * using Point_to_Point Send Communication for mype=0,...,npes-2: If (mype .lt. npes-1) & Call MPI_Send(T(1,NC4), NR, MPI_DOUBLE_PRECISION, mype+1, & RIGHT, MPI_COMM_WORLD, ierr) * starting at address T(1,NC4) for NR rows using datatype MPI_DOUBLE_PRECISION. * * MPI Step: mype Receives its Left Boundary Data from Left Neighbor mype-1 * (here using wildcard name MPI_ANY_SOURCE; note MsgTag=RIGHT), * using Point_to_Point Receive Communication for mype=1,...,npes-1: If (mype .ne. 0) & Call MPI_Recv(T(1,0), NR, MPI_DOUBLE_PRECISION, & MPI_ANY_SOURCE, RIGHT, MPI_COMM_WORLD, status, ierr) * starting at address T(1,0) for NR rows with datatype MPI_DOUBLE_PRECISION, * with Additional Vector Structure status Argument. * * MPI Step: mype Sends Left Boundary Data to Left Neighbor mype-1, * using Point_to_Point Send Communication for mype=1,...,npes-1: If (mype .ne. 0) & Call MPI_Send(T(1,1 ), NR, MPI_DOUBLE_PRECISION, mype-1, & LEFT, MPI_COMM_WORLD, ierr) * starting at address T(1,1) for NR rows with datatype MPI_DOUBLE_PRECISION. * * MPI Step: mype Receives its Right Boundary Data from its Right Neighbor mype+1 * (here using wildcard name MPI_ANY_SOURCE; note MsgTag=LEFT), * using Point_to_Point Receive Communication for mype=1,...,npes-1: * If (mype .ne. npes-1) & Call MPI_Recv(T(1,NC4+1), NR, MPI_DOUBLE_PRECISION, & MPI_ANY_SOURCE, LEFT, MPI_COMM_WORLD, status, ierr) * starting at address T(1,NC4+1) for NR rows with datatype MPI_DOUBLE_PRECISION, * with Additional Vector Structure status Argument. * * CAUTION: With New MPT toolkit, must interlace Sends and Recvs, * ELSE Processor Deadlock can happen. * * MPI Step: mype Sends to RootPE LocalMaxAbsChange=dt using Collective (Global) * Reduction Maximum Function MPI_MAX with Result Eventually Collected in * GlobalMaxAbsChange=dtg of count 1 with datatype MPI_DOUBLE_PRECISION: * and GlobalMaxAbsError=dtexg of count 1 with datatype MPI_DOUBLE_PRECISION: Call MPI_Reduce(dt, dtg, 1, MPI_DOUBLE_PRECISION, MPI_MAX, & ROOT, MPI_COMM_WORLD, ierr) Call MPI_Reduce(dtex, dtexg, 1, MPI_DOUBLE_PRECISION, MPI_MAX, & ROOT, MPI_COMM_WORLD, ierr) Change F98: Save Told for Next Iteration: Do j=0, NC4+1 Do i=0, NR+1 Told(i,j) = T(i,j) Enddo Enddo C ----------------------------- * * Root Print Selected Values at Every 100th Iteration: * Important that Writes are in Serial Sections by Root on MPP T3E: * Root Also Check Stopping Criteria Tolerance If( mype.eq.0 ) then If( mod(iter,niter/10).eq.0 ) then write(6,1) mype, iter, dtg, dtexg * 97: VPE=0 Writes Out a Sample Central Value: CUT98: write(6,2) mype, iter, NR2, NC0, NC4,DNC, CUT: & T(NR2,NC0),T(NR2,NCL2),T(NR2,NCE) Endif If(dtg.lt.stoptol) then nstop = 1 Endif Endif * Global Call Broadcast of Possible Stopping Value By All. Call MPI_Bcast(nstop, 1, MPI_INTEGER, ROOT, MPI_COMM_WORLD, ierr) 1 Format(' MYPE =',I4,'; ITER =',i5/ & ,' GlobalMaxAbsChange:dtg = ',e10.3 & ,'; GlobalMaxAbsError:dtexg = ',e10.3) 2 Format(' MYPE =',I4,'; ITER =',i5 & ,'; T(',i3,',',i3,':',i3,':',i3,') =',3f9.4) * MPI Step: Barrier is Final Call for all PEs to Stop and Wait for Rest * in MPI_COMM_WORLD to Finish Before Continuing on to Next Iteration: Call MPI_Barrier( MPI_COMM_WORLD, ierr ) * No longer Call Generic T3E barrier too as in olded code. If(nstop.eq.1) exit 110 CONTINUE * Stopping Criterion Stop: If( mype.eq.0 ) then * MPI Step: Stop MPI Wall Timer: stoptime = MPI_Wtime() totalwalltime = stoptime - starttime reldtexg = dtexg/400.0 write(6,3) niter,stoptol,dtg,dtexg,reldtexg,mype, & totalwalltime,timerres If(dtg>stoptol) then write(6,33) niter Endif Endif 3 Format(' niter = ',i8,'; Stoptol= ',e10.3, & ' ; GlobalMaxAbsChange:dtg = ',e10.3 & /' GlobalMaxAbsError:dtexg = ',e10.3, & '; RelMaxAbsError:dtexg/400 = ',e10.3 & /' MYPE =',i4,'; TotalWallTime =',e12.4 & ,' secs; WallTick =',e12.4,' secs;') * Print Final Sample Output Temperatures for Each mype's Panel: write(6,4) mype, iter, dt, dtex, (j,j=0,NC4,DNCO),NC4+1 4 Format(' Sample Output: MYPE =',I4,'; ITER =',i5, & '; dt =',e12.4, '; dtex =',e12.4 & /' I/J ',10i7) 33 Format(' More Than ', i8,' Iterations Needed') Do i = NROE, NRO0, -DNRO write(6,5) i,(T(i,j),j=0,NC4,DNCO),T(i,NC4+1) CTemp: write(6,5) i,(dtexv(i,j),j=0,NC4,DNCO),dtexv(i,NC4+1) Enddo write(6,5) NYI,(dtexv(NYI,j),j=0,NC4,DNCO),dtexv(NYI,NC4+1) CTEMP: write(6,5) 0,(dtexv(0,j),j=0,NC4,DNCO),dtexv(0,NC4+1) 5 Format(i4,1x,10f7.2) * MPI Step: MPI Required Finalization Step: Call MPI_Finalize(ierr) * * End of Main Program Compilation! * END