cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc subroutine transp(mytag, size, slices, f, tids, ntids) cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc c This subroutine performs a 3-D matrix transposition on the c data array f. The call to transp() is made simultaneously by c the ntids processors which hold a "slice" of the array. c The processors do pairwise exchanges of the appropriate columns c of the array, followed by in-processor local transposition. c This version is written using PVM for message passing. c c Contact: Karin A. Remington, karin@cam.nist.gov ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc implicit none include 'fpvm3.h' integer*4 mytag, ntids, tids(ntids) integer*4 size, slices double precision f(size, size, slices) integer*4 tagnode, tagcol, tagsend, tagrecv, msgsize, vecleng integer*4 i, is1, is2, indcol, is, colid integer*4 add1, add2, add3, halfsize, dest logical black integer info msgsize = size*slices vecleng = size halfsize = ntids/2 if ( mytag < halfsize ) then add1 = 0 add2 = halfsize add3 = halfsize else add1 = halfsize add2 = -halfsize add3 = 0 end if if ( int( mytag/2 ) * 2 /= mytag ) then black = .false. else black = .true. endif if ( black ) then do 100 i = 2, halfsize dest = mod((mytag-1)+i,halfsize)+ 1 + add1 indcol = (dest-1)*slices+1 call pvmfinitsend(PVMDEFAULT, info) do 110 is = 1,slices call pvmfpack(REAL8, f(1,indcol,is), msgsize, 1, info) 110 continue call pvmfsend(tids(dest), mytag, info) call pvmfrecv(-1, dest-1, info) do 120 is = 1,slices call pvmfunpack(REAL8, f(1,indcol,is), msgsize, 1, info) 120 continue 100 continue do 130 i = 1, halfsize dest = mod((mytag-1)+i,halfsize)+ 1 + add3 indcol = (dest-1)*slices+1 call pvmfinitsend(PVMDEFAULT, info) do 140 is = 1,slices call pvmfpack(REAL8, f(1,indcol,is), msgsize, 1, info) 140 continue call pvmfsend(tids(dest), mytag, info) call pvmfrecv(-1, dest-1, info) do 150 is = 1,slices call pvmfunpack(REAL8, f(1,indcol,is), msgsize, 1, info) 150 continue 130 continue else do 200 i = 2, halfsize dest = mod((mytag+1)-i+halfsize,halfsize)+ 1 + add1 indcol = (dest-1)*slices+1 call pvmfinitsend(PVMDEFAULT, info) do 210 is = 1,slices call pvmfpack(REAL8, f(1,indcol,is), msgsize, 1, info) 210 continue call pvmfsend(tids(dest), mytag, info) call pvmfrecv(-1, dest-1, info) do 220 is = 1,slices call pvmfunpack(REAL8, f(1,indcol,is), msgsize, 1, info) 220 continue 200 continue do 230 i = 1, halfsize dest = mod((mytag+1)-i+halfsize,halfsize)+ 1 + add3 indcol = (dest-1)*slices+1 call pvmfinitsend(PVMDEFAULT, info) do 240 is = 1,slices call pvmfpack(REAL8, f(1,indcol,is), msgsize, 1, info) 240 continue call pvmfsend(tids(dest), mytag, info) call pvmfrecv(-1, dest-1, info) do 250 is = 1,slices call pvmfunpack(REAL8, f(1,indcol,is), msgsize, 1, info) 250 continue 230 continue endif ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc c Local (in processor) data transpose: ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc do 300 i = 0,ntids-1 colid = i*slices+1 do 310 is2 = 1, slices-1 do 320 is1 = is2+1, slices call dswap(vecleng,f(1,colid-1+is1,is2),1, * f(1,colid-1+is2,is1),1) 320 continue 310 continue 300 continue return end ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc c End subroutine transp ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc subroutine dswap (n,dx,incx,dy,incy) c c interchanges two vectors. c uses unrolled loops for increments equal one. c jack dongarra, linpack, 3/11/78. c double precision dx(1),dy(1),dtemp integer i,incx,incy,ix,iy,m,mp1,n c if(n.le.0)return if(incx.eq.1.and.incy.eq.1)go to 20 c c code for unequal increments or equal increments not equal c to 1 c ix = 1 iy = 1 if(incx.lt.0)ix = (-n+1)*incx + 1 if(incy.lt.0)iy = (-n+1)*incy + 1 do 10 i = 1,n dtemp = dx(ix) dx(ix) = dy(iy) dy(iy) = dtemp ix = ix + incx iy = iy + incy 10 continue return c c code for both increments equal to 1 c c c clean-up loop c 20 m = mod(n,3) if( m .eq. 0 ) go to 40 do 30 i = 1,m dtemp = dx(i) dx(i) = dy(i) dy(i) = dtemp 30 continue if( n .lt. 3 ) return 40 mp1 = m + 1 do 50 i = mp1,n,3 dtemp = dx(i) dx(i) = dy(i) dy(i) = dtemp dtemp = dx(i + 1) dx(i + 1) = dy(i + 1) dy(i + 1) = dtemp dtemp = dx(i + 2) dx(i + 2) = dy(i + 2) dy(i + 2) = dtemp 50 continue return end