cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccd
c
c  This is the 3D fast multipole method (FMM) code for the Yukawa 
c  potential (also called modified Helmholtz, screened Coulomb, 
c  linearized Poisson-Boltzman, etc.). It is based on the new version 
c  of FMM first introduced by Greengard and Rokhlin for the Laplace
c  equation in 1997 (see Ref. 2 below). It computes the screened 
c  Coulombic interaction between n particles.
c
c  Copyright (C) Jingfang Huang 
c
c  This program is free software; you can redistribute it and/or
c  modify it under the terms of the GNU General Public License
c  as published by the Free Software Foundation; either version 2
c  of the License, or (at your option) any later version.
c
c  This program is distributed in the hope that it will be useful,
c  but WITHOUT ANY WARRANTY; without even the implied warranty of
c  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
c  GNU General Public License for more details.
c
c  License details are available in license.txt, you may also write to 
c
c    The Free Software Foundation, Inc., 
c    51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
c
c  Reference:
c    1. Greengard, Leslie F.; Huang, Jingfang: A new version of the 
c       fast multipole method for screened Coulomb interactions in 
c       three dimensions. J. Comput. Phys. 180 (2002), no.2, 642--658. 
c 
c    2. Greengard, Leslie; Rokhlin, Vladimir: A new version of the 
c       fast multipole method for the Laplace equation in three dimensions.  
c       Acta numerica, 1997,  229--269, Acta Numer., 6, Cambridge Univ. 
c       Press, Cambridge, 1997. 
c
c  For suggestions, comments, and bug reports, please contact 
c
c    Jingfang Huang
c    CB# 3250, Phillips Hall
c    Department of Mathematics, UNC
c    Chapel Hill, NC 27599-3250, USA.
c    Email: huang@amath.unc.edu
c
cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccd
      SUBROUTINE FMMYUK_UNI(BETA,NATOMS,ZAT,CHARGE,POT,FIELD,
     1  NLEV,IER)
cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccd
c
c  purpose:
c    this is the main calling subroutine. the purpose of this
c    subroutine is to provide an interface between other codes
c    and the fmm subroutines in this package.
c
c  on input:
c    beta: the frequency of the equation.
c    natoms: total number of atoms.
c    zat(3,natoms): particle locations.
c    charge(natoms) : charge each particle carries.
c    nlev: number of levels in the octree structure.
c
c  on output:
c    pot(natoms): potential at each particle location.
c    field(3,natoms): the force field at each particle location.
c    ier: error message.
c
c  other parameters:
c    a few parameters are specified in the file parm-uniyuk.h,
c    including:
c      iflag: for boundary condition, currently only free space
c             boundary condition is implemented. periodic will be
c             added later.
c      nterms: the number of terms in the multipole and local expansion.
c      nlambs: the number of terms in the exponential expansion.
c
c    the default is set to nterms=nlambs=9 for 3 digits accuracy.
c
c  memory allocation.
c    the memory is divided to two parts, fixed size part and allocated
c    part.
c
c    for fixed size part, check parm-uniyuk.h.
c    memory allocation is done by calculating the total memory required
c      for integer, real and complex variables, and three big vectors are
c      allocated.
c
cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccd
c
      IMPLICIT NONE
c
c-----include parm-uniyuk.h for additional parameters and fixed lengh
c       stuff.
c
      INCLUDE "parm-uniyuk.h"
c
      INTEGER *4 NATOMS,NLEV,IER
      REAL *8 BETA
      REAL *8 ZAT(3,NATOMS),CHARGE(NATOMS)
      REAL *8 POT(NATOMS),FIELD(3,NATOMS)
c
c-----local variables important for memory allocation.
c
      INTEGER *4 NEXPTOT,NEXPTOTP,NEXPMAX,NTHMAX,NBOXES
      INTEGER *4 IIOFFST,IIATADR,IIBOX,IICNT,IICNT2
      INTEGER *4 ICENTER,IZAT2,ICHARG2,IZS
      INTEGER *4 IXS,IYS,IMEXPF1,IMEXPF2,IMEXPP1,IMEXPP2,IMEXPPALL,
     1           IFEXPE,IFEXPO,IFEXPBACK,IMPOLE,ILOCAL,ILEXP1,ILEXP2
      INTEGER *4 IPTR,LASTI,LASTR,LASTC
      INTEGER *4, ALLOCATABLE :: IXFMM(:)
      REAL *8, ALLOCATABLE :: XRFMM(:)
      COMPLEX *16, ALLOCATABLE :: XCFMM(:)
c
c-----more local variables.
c
      INTEGER *4 I,INDD,MMAX,JJ
      REAL *8 TIME0,TIME1,TEST1,TEST2,TOTMEM,RMEM
c
c-----function called
c
      REAL *8 SECOND
c
c-----0. estimate the memory used. it contains the following parts.
c       (a)input/output. (b) fixed fmm memory. (c) allocated fmm memory.
c       the first two parts will be calculated first. and (c) will be
c       added later.
c
      TOTMEM=DBLE(64*NATOMS)/1024D0/1024D0
      CALL PRIN2('fmm input/output memory (mb) *',TOTMEM,1)
      RMEM=DBLE(4*(170+NLAMBS*2)+8*((4*NTERMS+1)**2+2*NLAMBS+
     1  3*(2*NTERMS+1)*(2*NTERMS+1)*(2*NTERMS+1)+3600*4+
     2  NLAMBS*(1+NTERMS)*(1+NTERMS))+16*((NTERMS+1)**2)
     3  *3)/1024D0/1024D0
      TOTMEM=TOTMEM+RMEM
      CALL PRIN2('fmm fixed memory (mb) *',RMEM,1)
c
c-----1. generate precomputed matrices.
c        this part should be optimized using quadrature code later.
c
      CALL YHFRMINI(NTERMS,YTOP)
      CALL YHROTGEN(NTERMS,CARRAY,RDPLUS,RDMINUS,RDSQ3,RDMSQ3)
      CALL VWTS(RLAMS,WHTS,NLAMBS)
      CALL NUMTHETAHALF(NUMFOUR,NLAMBS)
      CALL NUMTHETAFOUR(NUMPHYS,NLAMBS)
c
c-----2. the number of exponential terms used in the plane wave expansion.
c
      DO I=1, NLAMBS
        TEST1= RLAMS(I)
        TEST2= DSQRT(TEST1*TEST1+2.0D0*TEST1*BETA)
        INDD=I
        MMAX=NUMPHYS(I)
        DO JJ=I, NLAMBS
          IF (TEST2 .LE. RLAMS(JJ) ) THEN
            INDD=JJ
            GOTO 1001
          ELSE
            MMAX=MAX(NUMPHYS(JJ), MMAX)
          ENDIF
        ENDDO
1001    NUMPHYS(I)=MAX(MMAX, NUMPHYS(INDD))
      ENDDO
c
      NEXPTOT = 0
      NTHMAX = 0
      NEXPTOTP = 0
      DO I = 1,NLAMBS
         NEXPTOT = NEXPTOT + NUMFOUR(I)
         IF (NUMFOUR(I).GT.NTHMAX) NTHMAX = NUMFOUR(I)
         NEXPTOTP = NEXPTOTP + NUMPHYS(I)
      ENDDO
      NEXPTOTP = NEXPTOTP/2
c
      NEXPMAX=MAX(NEXPTOT,NEXPTOTP)+1
c
c-----3. the total number of boxes.
c
      NBOXES=(8**NLEV+7)/7+5
c
c-----4. now make pointers to fmm memory blocks.
c
      IPTR=1
      CALL ADJ_MEM_PTR(IPTR,IIOFFST,NBOXES)
      CALL ADJ_MEM_PTR(IPTR,IIATADR,NATOMS)
      CALL ADJ_MEM_PTR(IPTR,IIBOX,NATOMS)
      CALL ADJ_MEM_PTR(IPTR,IICNT,NBOXES)
      CALL ADJ_MEM_PTR(IPTR,IICNT2,NBOXES)
      LASTI=IPTR
c
      IPTR=1
      CALL ADJ_MEM_PTR(IPTR,ICENTER,3*NBOXES)
      CALL ADJ_MEM_PTR(IPTR,IZAT2,3*NATOMS)
      CALL ADJ_MEM_PTR(IPTR,ICHARG2,NATOMS)
      CALL ADJ_MEM_PTR(IPTR,IZS,3*NEXPMAX)
      LASTR=IPTR
c
      IPTR=1
      CALL ADJ_MEM_PTR(IPTR,IXS,3*NEXPMAX)
      CALL ADJ_MEM_PTR(IPTR,IYS,3*NEXPMAX)
      CALL ADJ_MEM_PTR(IPTR,IMEXPF1,NEXPMAX)
      CALL ADJ_MEM_PTR(IPTR,IMEXPF2,NEXPMAX)
      CALL ADJ_MEM_PTR(IPTR,IMEXPP1,NEXPMAX)
      CALL ADJ_MEM_PTR(IPTR,IMEXPP2,NEXPMAX)
      CALL ADJ_MEM_PTR(IPTR,IMEXPPALL,16*NEXPMAX)
      CALL ADJ_MEM_PTR(IPTR,IFEXPE,15000)
      CALL ADJ_MEM_PTR(IPTR,IFEXPO,15000)
      CALL ADJ_MEM_PTR(IPTR,IFEXPBACK,15000)
      CALL ADJ_MEM_PTR(IPTR,IMPOLE,(NTERMS+1)*(NTERMS+1)*NBOXES)
      CALL ADJ_MEM_PTR(IPTR,ILOCAL,(NTERMS+1)*(NTERMS+1)*NBOXES)
      CALL ADJ_MEM_PTR(IPTR,ILEXP1,NEXPMAX*NBOXES)
      CALL ADJ_MEM_PTR(IPTR,ILEXP2,NEXPMAX*NBOXES)
      LASTC=IPTR
c
c-----allocate memory
c
      RMEM=DBLE(LASTI*4)/1024D0/1024D0+
     1     DBLE(LASTR*8D0+LASTC*16D0)/1024D0/1024D0
      TOTMEM=TOTMEM+RMEM
      CALL PRIN2('fmm memory allocated (mb) *',RMEM,1)
      CALL PRIN2('total memory (mb) *',TOTMEM,1)
c
      ALLOCATE(IXFMM(LASTI),XRFMM(LASTR),XCFMM(LASTC),STAT=IER)
c
      IF (IER .NE. 0) THEN
        CALL PRINF('memory allocation error in fmm, ier=*',IER,1)
        STOP
      ENDIF
c
c-----5. make the uniform tree.
c
      CALL PSBKIN(NLEV,LADDER,XRFMM(ICENTER))
c
c-----6. now the fast multipole calculation for far field.
c
ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
c      TIME0 = SECOND()
      CALL YUNIYUK(BETA,IFLAG,NLEV,ZAT,NATOMS,CHARGE,NTERMS,
     1  IXFMM(IIOFFST),IXFMM(IIATADR),LADDER,IXFMM(IIBOX),XRFMM(IZAT2),
     2  XRFMM(ICHARG2),XCFMM(IMPOLE),XCFMM(ILOCAL),XCFMM(ILEXP1),
     3  XCFMM(ILEXP2),XRFMM(ICENTER),IXFMM(IICNT),IXFMM(IICNT2),NBORS,
     4  POT,FIELD,DC,RDPLUS,RDMINUS,RDMSQ3,RDSQ3,WHTS,RLAMS,NLAMBS,
     5  NUMFOUR,NUMPHYS,NEXPTOT,NEXPTOTP,NTHMAX,XCFMM(IMEXPF1),
     6  XCFMM(IMEXPF2),XCFMM(IMEXPP1),XCFMM(IMEXPP2),XCFMM(IMEXPPALL),
     7  XCFMM(IXS),XCFMM(IYS),XRFMM(IZS),MW1,MW2,MW3,XCFMM(IFEXPE),
     8  XCFMM(IFEXPO),XCFMM(IFEXPBACK),RLSC,YTOP)
c
c      TIME1 = SECOND()
c      WRITE(11,554)TIME1-TIME0
  554   FORMAT('multipole calculation for far field',F8.2)
c 554   FORMAT(' time for expansion work is ',F8.2)
ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
c-----7. compute near neighbor interactions, the direct interaction part.
c
      TIME0 = SECOND()
      CALL YNBDIRECT(BETA,IFLAG,NLEV,ZAT,NATOMS,CHARGE,POT,FIELD,
     1  IXFMM(IIOFFST),IXFMM(IIATADR),LADDER,NBORS,XRFMM(ICENTER))
      TIME1 = SECOND()
      WRITE(11,555)TIME1-TIME0
555   FORMAT(' time for local work is ',F8.2)
c
      DEALLOCATE(IXFMM,XRFMM,XCFMM)
c
      RETURN
      END
c
cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccd
      SUBROUTINE YUNIYUK(BETA,IFLAG,NLEV,ZAT,NATOMS,CHARGE,
     1  NTERMS,IOFFST,IATADR,LADDER,IBOX,
     2  ZAT2,CHARG2,MPOLE,LOCAL,LEXP1,LEXP2,CENTER,ICNT,ICNT2,
     3  NBORS,POT,FIELD,DC,RDPLUS,RDMINUS,RDMSQ3,RDSQ3,WHTS,RLAMS,
     4  NLAMBS,NUMFOUR,NUMPHYS,NEXPTOT,MNEXPTOTP,NTHMAX,MEXPF1,
     5  MEXPF2,MEXPP1,MEXPP2,MEXPPALL,XS,YS,ZS,MW1,MW2,MW3,
     6  FEXPE,FEXPO,FEXPBACK,RLSC,YTOP)
cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccd
c
c  purpose:
c
c    main subroutine of the new fmm for far field, based on multipole
c      and exponential expansions. two passes are executed. in the
c      first pass, multipole expansions for all boxes at all levels
c      are computed. in the second pass, interactions are computed at
c      successively finer levels.
c
c    this is the uniform code.
c
c  on input:
c
c    beta: frequency of the equation (e^(-beta*r)/r)
c    iflag: free space or periodic boundary condition.
c      currently only free space calculation is available.
c    nlev: number of levels in the uniform tree structure.
c    zat(3,natoms): locations of the particles,
c    natoms: total number of atoms.
c    charge(natoms): charge each particle carries.
c    nterms: number of terms in the multipole/local expansion.
c    nlambs: number of terms in the exponential expansion.
c    nexptot: total number of exponential expansion terms.
c    mnexptotp: max number of exponential expansions for the
c      worst case. note that only half the terms are used due to
c      symmetry in the fourier expansion. this parameter is used
c      to make sure enough memory is allocated for all cases.
c    nthmax: the max number of fourier terms in the exponential
c      expansion.
c
c  uniform tree structure:
c    ioffst:indicates where in the array iatadr the listing of
c      particles contained in box i begins.
c    iatadr: an ordered list of addresses of atoms.
c      the first group of addresses correspond to atoms which
c      lie in the first box, etc.
c    ladder: bookkeeping array where ladder(i) is number of boxes
c      in oct-tree of depth i.
c    ibox(i): address of box containing the ith atom
c    center: center of boxes in the uniform tree structure.
c    nbors: the neighboring boxes.
c
c  precomputed tables:
c    dc(0:nterms,0:nterms,0:nterms): precomputed array containing
c      coefficients for the local translation along the z axis.
c      this is precomputed by the subroutine lcshftcoef().
c    rdplus(0:nterms,0:nterms,-nterms:nterms): rotation matrix,
c      y to z, see subroutine rotgen<- fstrtn().
c    rdminus(0:nterms,0:nterms,-nterms:nterms): similar to rdplus,
c      z to x.
c    rdsq3(0:nterms,0:nterms,-nterms:nterms): similar to rdplus,
c      shifts the multipole and local expansions, +z direction.
c    rdmsq3(0:nterms,0:nterms,-nterms:nterms): similar to rdsq3,
c      -z direction.
c    whts(nlambs): weights for the plane wave (exponential) expansion.
c    rlams(nlambs): nodes for the plane wave expansion.
c    numfour(nlambs): number of fourier modes in the expansion.
c    numphys(nlambs): number of modes in the plane wave expansion.
c    ytop(0:60,0:60): factorial scaling factors. note that the size
c      is fixed because the vector is precomputed outside this code.
c
c  on output:
c    pot(natoms): potential at different particle locations.
c    field(natoms): force field at different particle locations.
c
c  variables
c    xat2,yat2,zat2,charg2(nbox): for generating multipole and
c      local expansions.
c    mpole(0:nterms+1,0:nterms+1,nboxes): multipole expansions for
c      all boxes.
c    local(0:nterms+1,0:nterms+1,nboxes): local expansions for
c      all boxes.
c    lexp1(mnexptotp,nboxes): exponential expansions in the
c      first direction.
c    lexp2(mnexptotp,nboxes): similar to lexp1, for the second direction.
c    icnt,icnt2: workspace array for assigning particles.
c    mexpf1(nexptot): used for exponential expansion, for mexpup.
c    mexpf2(nexptot): similar to mexpf1, for mexpdown.
c    mexpp1(mnexptotp): similar to mexpf1, for mexpuphys.
c    mexpp2(mnexptotp): similar to mexpf1, for mexpdnphys.
c    mexppall(mnexptotp,16): used for exponential expansions. the
c      expansions are first merged and then translated to different
c      locations.
c    xs(3,mnexptotp): stores the diagonal translation operators when
c      shifting the exponential expansion. this is complex *16.
c    ys(3,mnexptotp): similar to xs.
c    zs(3,mnexptotp): similar to xs, but this is real *8.
c    mw1(0:nterms,0:nterms): temporary working space for storing multipole
c      and local expansions.
c    mw2(0:nterms,0:nterms): similar to mw1.
c    mw3(0:nterms,0:nterms): similar to mw1, for "point and shoot" technique.
c    fexpe(?),fexpo(?),fexpback(?): used for merging exponential expansions.
c      note that the size of these vectors changes depending on beta and
c      accuracy requirements. therefore the memory must be allocated
c      correctly. the following is an estimate of the size.
c      size of fexpe, fexpo, fexpback =  4000 for nlambs = 20
c      size of fexpe, fexpo, fexpback =   400 for nlambs = 10
c    rlsc(0:nterms,0:nterms,nlambs): stores p_n^m for different lambda_k.
c
cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccd
c
      IMPLICIT NONE
c
      INTEGER *4 IFLAG,NLEV,NTERMS,NATOMS,NLAMBS,IOFFST(1),IATADR(1)
      INTEGER *4 NEXPTOT,MNEXPTOTP,NTHMAX
      INTEGER *4 LADDER(1),IBOX(1),NBORS(1)
      INTEGER *4 ICNT(1),ICNT2(1)
      INTEGER *4 NUMPHYS(1), NUMFOUR(1)
c
      REAL *8 ZAT(3,NATOMS),CHARGE(NATOMS)
      REAL *8 ZAT2(3,1),CHARG2(1)
      REAL *8 CENTER(3,1),POT(NATOMS)
      REAL *8 FIELD(3,NATOMS)
      REAL *8 BETA,RLAMS(1)
      REAL *8 YTOP(0:NTERMS,0:NTERMS)
      REAL *8 ZS(3,MNEXPTOTP)
      REAL *8 RLSC(0:NTERMS,0:NTERMS,NLAMBS)
      REAL *8 RDPLUS(0:NTERMS,0:NTERMS,-NTERMS:NTERMS)
      REAL *8 RDMINUS(0:NTERMS,0:NTERMS,-NTERMS:NTERMS)
      REAL *8 RDSQ3(0:NTERMS,0:NTERMS,-NTERMS:NTERMS)
      REAL *8 RDMSQ3(0:NTERMS,0:NTERMS,-NTERMS:NTERMS)
      REAL *8 DC(0:NTERMS,0:NTERMS,0:NTERMS)
      REAL *8 WHTS(NLAMBS)
c
      COMPLEX *16 MPOLE(0:NTERMS,0:NTERMS,*)
      COMPLEX *16 LOCAL(0:NTERMS,0:NTERMS,*)
      COMPLEX *16 LEXP1(MNEXPTOTP,*)
      COMPLEX *16 LEXP2(MNEXPTOTP,*)
      COMPLEX *16 MEXPF1(NEXPTOT)
      COMPLEX *16 MEXPF2(NEXPTOT)
      COMPLEX *16 MEXPP1(MNEXPTOTP)
      COMPLEX *16 MEXPP2(MNEXPTOTP)
      COMPLEX *16 MEXPPALL(MNEXPTOTP,16)
      COMPLEX *16 XS(3,MNEXPTOTP)
      COMPLEX *16 YS(3,MNEXPTOTP)
      COMPLEX *16 FEXPE(1)
      COMPLEX *16 FEXPO(1)
      COMPLEX *16 FEXPBACK(1)
      COMPLEX *16 MW1(0:NTERMS,0:NTERMS)
      COMPLEX *16 MW2(0:NTERMS,0:NTERMS)
      COMPLEX *16 MW3(0:NTERMS,0:NTERMS)
c
c-----local variables
c
      INTEGER *4 NALLBX,KAT,INFO,MYCHILD,IFL
      INTEGER *4 INDD,MMAX,JJ,MAX,NEXPTOTP
      INTEGER *4 ISTARTC,NBC,MBC,NBOXESC
      INTEGER *4 I1(36),N1,IX1(36),IY1(36)
      INTEGER *4 I2(16),N2,IX2(16),IY2(16)
      INTEGER *4 I3(4),N3,IX3(4),IY3(4)
      INTEGER *4 I4(4),N4,IX4(4),IY4(4)
      INTEGER *4 I5(4),N5,IX5(4),IY5(4)
      INTEGER *4 I6(4),N6,IX6(4),IY6(4)
      INTEGER *4 I7(4),N7,IX7(4),IY7(4)
      INTEGER *4 I8(4),N8,IX8(4),IY8(4)
      INTEGER *4 ISTART,NBOXES,NINBOX,I,J,K,IOFF
      INTEGER *4 ILEV,MYBOX,IAT
      INTEGER *4 ICHILD(8)
c
      REAL *8 R0,SCALL,BETASCAL,TEST1,TEST2
      REAL *8 P(10000)
      REAL *8 ZERO
      REAL *8 SCALE(20),SHIFT(3)
c
      COMPLEX *16 IMAG
c
c-----timing variables.
c
      REAL *8 TIMEA,TIMEB,TIMEPRE
      REAL *8 TIME_INIT,TIME_UPPASS,TIME_TATA,TIME_UDEXP,TIME_PROCUD
      REAL *8 TIME_MKNSEXP,TIME_PROCNS,TIME_MKESEXP,TIME_PROCEW
      REAL *8 TIME_MKEWEXP,TSHIFTS,TMKEXPS
      REAL *8 TIME_EXPTOMP,TIME_LCEVAL
c
c-----functions called.
c
      REAL *8 SECOND
c
      DATA IMAG/(0.0D0,1.0D0)/
      DATA ZERO/0.0D0/
c
c-----initialize time counters:
c
      TIME_INIT = 0
      TIME_UPPASS = 0
      TIME_TATA	 = 0
      TIME_UDEXP = 0
      TIME_PROCUD = 0
      TIME_MKNSEXP = 0
      TIME_PROCNS = 0
      TIME_MKEWEXP = 0
      TIME_PROCEW = 0
      TIME_EXPTOMP = 0
      TIME_LCEVAL = 0
c
c-----initialize multipole and local expansions to zero.
c
      TIMEB = SECOND()
      NALLBX = LADDER(NLEV+1)
      DO I = 1,NALLBX
        DO J = 0,NTERMS
          DO K = 0,NTERMS
            MPOLE(J,K,I) = ZERO
            LOCAL(J,K,I) = ZERO
          ENDDO
        ENDDO
      ENDDO
c
c-----initialize potential and field at all locations.
c
      DO I=1,NATOMS
        POT(I)=0.0D0
        FIELD(1,I)=0.0D0
        FIELD(2,I)=0.0D0
        FIELD(3,I)=0.0D0
      ENDDO
c
c-----set scale factors for all levels to avoid over- and under-flow
c       in computing special functions.
c
      IF (BETA. GT. 1.0D0) THEN
        SCALE(1)=1.0D0
      ELSE
        SCALE(1)=BETA
      ENDIF
      DO 100 I = 2,NLEV
        SCALE(I) = SCALE(I-1)/2.0D0
100   CONTINUE
c
c-----assign particles to boxes
c
      CALL ASSIGN(LADDER,NLEV,ZAT,IOFFST,IBOX,IATADR,
     1                   NATOMS,ICNT,ICNT2)
      TIMEA = SECOND()
      TIME_INIT = TIMEA - TIMEB
c
c=============================================================
c-----begin upward pass
c=============================================================
c
      CALL PRINF(' beginning upward pass *',NLEV,0)
c
c=======================================================================
c-----step 1: form multipoles at finest mesh level
c=======================================================================
c
      ISTART = LADDER(NLEV)
      NBOXES = LADDER(NLEV+1) - ISTART
      DO 300 J = 1,NBOXES
        MYBOX = ISTART + J
        IOFF = IOFFST(J)
        NINBOX = IOFFST(J+1) - IOFF
        IF ( NINBOX .LE. 0 ) GOTO 300
        DO 200 K = 1,NINBOX
          KAT = IATADR(IOFF+K)
          ZAT2(1,K) = ZAT(1,KAT)
          ZAT2(2,K) = ZAT(2,KAT)
          ZAT2(3,K) = ZAT(3,KAT)
          CHARG2(K)  = CHARGE(KAT)



200     CONTINUE
c=====================================================================
c    this subroutine forms the multipole expansion cause by the nparts
c      particles in the box. outputs coefficients
c=====================================================================
        CALL YFORMMP(BETA,CENTER(1,MYBOX),ZAT2,CHARG2,
     1    NINBOX,MPOLE(0,0,MYBOX),NTERMS,SCALE(NLEV),P, YTOP)
300   CONTINUE
c
c=======================================================================
c-----step 2: merge multipoles at all coarser mesh levels until
c             expansion for whole computational cell is obtained.
c             in the inner loop, the expansion for each box is
c             shifted to center of parent box.
c=======================================================================
c
      DO 800 ILEV = NLEV-1,1,-1
c
c-------first compute the coefficients for the shifting along the
c         z-axis, this should be done for different levels. and
c         the computation can be further simplified in the future.
c         first compute the distance from the children center to
c         the parent center.
c
        R0=DSQRT(3.0D0)/(2.0D0**(ILEV+1))
        CALL YMPSHFTCOEF(SCALE(ILEV+1), BETA, R0, NTERMS, DC, INFO)
c
        ISTART = LADDER(ILEV)
        NBOXES = LADDER(ILEV+1) - ISTART
c
c-------get all eight children expansions.
c
        DO 400 J = 1,NBOXES
          MYBOX = ISTART + J
          CALL MKCHILD(ILEV,LADDER,MYBOX,ICHILD)
          DO K = 1,4
            MYCHILD = ICHILD(K)
            IF (K.EQ.1) THEN
              IFL = 3
            ELSE IF (K.EQ.2) THEN
              IFL = 4
            ELSE IF (K.EQ.3) THEN
              IFL = 2
            ELSE
              IFL = 1
            ENDIF
            CALL YMPSHIFT(IFL,MPOLE(0,0,MYCHILD),MW1,MW2,
     1        NTERMS,DC,RDSQ3)
            CALL ADDEXP(MW1,MPOLE(0,0,MYBOX),NTERMS)
          ENDDO
c
          DO K = 5,8
            MYCHILD = ICHILD(K)
            IF (K.EQ.5) THEN
              IFL = 3
            ELSE IF (K.EQ.6) THEN
              IFL = 4
            ELSE IF (K.EQ.7) THEN
              IFL = 2
            ELSE
              IFL = 1
            ENDIF
            CALL YMPSHIFT(IFL,MPOLE(0,0,MYCHILD),MW1,MW2,
     1        NTERMS,DC,RDMSQ3)
c
c  purpose: adds one vector to another.
c
c  note: this can be replaced by future blast subroutines.
c
c  on input: expansions a and b of size (0:nterms,0:nterms)
c
c  on output: a is over written by (a+b).
c

            CALL ADDEXP(MW1,MPOLE(0,0,MYBOX),NTERMS)
          ENDDO
400     CONTINUE
800   CONTINUE
c
      TIMEB = SECOND()
      TIME_UPPASS = TIMEB - TIMEA
c      call prin2(' time for upward pass *',timeb-timea,1)
c
c======================================================================d
c
c-----upward pass complete
c       all multipole expansions are available.
c
c     begin downward pass
c       at each level, send local expansion to children.
c
c=======================================================================
c-----step 3: compute interaction list for all boxes and send resulting
c             local expansions to children.
c=======================================================================
c
      DO 2000 ILEV = 1,NLEV-1
        CALL PRINF(' downward pass, ilev = *',ILEV,1)
        TIMEA = SECOND()
c
c-------precomp the shifting translation coefficients along the z-zxis.
c
        R0=DSQRT(3.0D0)/(2.0D0**(ILEV+1))
        CALL YLCSHFTCOEF(SCALE(ILEV), BETA, R0, NTERMS, DC, INFO)
c
        ISTART = LADDER(ILEV)
        NBOXES = LADDER(ILEV+1) - ISTART
        DO 1000 J = 1,NBOXES
          MYBOX = ISTART+J
          CALL MKCHILD(ILEV,LADDER,MYBOX,ICHILD)
          DO K = 1,4
            MYCHILD = ICHILD(K)
            IF (K.EQ.1) THEN
              IFL = 3
            ELSE IF (K.EQ.2) THEN
              IFL = 4
            ELSE IF (K.EQ.3) THEN
              IFL = 2
            ELSE
              IFL = 1
            ENDIF
            CALL YLCSHIFT(IFL,LOCAL(0,0,MYBOX),MW1,MW2,
     1        NTERMS,DC,RDMSQ3)
            CALL ADDEXP(MW1,LOCAL(0,0,MYCHILD),NTERMS)
          ENDDO
c
          DO K = 5,8
            MYCHILD = ICHILD(K)
            IF (K.EQ.5) THEN
              IFL = 3
            ELSE IF (K.EQ.6) THEN
              IFL = 4
            ELSE IF (K.EQ.7) THEN
              IFL = 2
            ELSE
              IFL = 1
            ENDIF
            CALL YLCSHIFT(IFL,LOCAL(0,0,MYBOX),MW1,MW2,
     1        NTERMS,DC,RDSQ3)
            CALL ADDEXP(MW1,LOCAL(0,0,MYCHILD),NTERMS)
          ENDDO
1000    CONTINUE
c
        TIMEB = SECOND()
        TIME_TATA = TIME_TATA + (TIMEB - TIMEA)
c
c-------now calculate the mp-pw-local, first do the precomputing.
c       we need
c         1. rlsc(): the p_n^m() for different lambda_k at different levels.
c         2. xs, ys, zs: the diagonal translation coefficients.
c
c-------the scaled beta.
c
        SCALL=SCALE(ILEV+1)
        BETASCAL=BETA*2.0D0**(-ILEV)
c
c-------now recompute the number of physical modes necessary for
c         the inner intergral. note that this should be improved
c         in future versions.
c
        CALL NUMTHETAFOUR(NUMPHYS,NLAMBS)
        DO I=1, NLAMBS
          TEST1= RLAMS(I)
          TEST2= DSQRT(TEST1*TEST1+2.0D0*TEST1*BETASCAL)
          INDD=I
          MMAX=NUMPHYS(I)
          DO JJ=I, NLAMBS
            IF (TEST2 .LE. RLAMS(JJ) ) THEN
              INDD=JJ
              GOTO 1001
            ELSE
              MMAX=MAX(NUMPHYS(JJ), MMAX)
            ENDIF
          ENDDO
1001      NUMPHYS(I)=MAX(MMAX, NUMPHYS(INDD))
        ENDDO
c
c        call prinf(' called numthetafour *',natoms,1)
c        call prinf(' numphys is*',numphys,nlambs)
c
        NEXPTOT = 0
        NTHMAX = 0
        NEXPTOTP = 0
        DO I = 1,NLAMBS
          NEXPTOT = NEXPTOT + NUMFOUR(I)
          IF (NUMFOUR(I).GT.NTHMAX) NTHMAX = NUMFOUR(I)
          NEXPTOTP = NEXPTOTP + NUMPHYS(I)
        ENDDO
        NEXPTOTP = NEXPTOTP/2
c        call prinf('nexptotp = *',nexptotp,1 )
        IF (NEXPTOTP .GT. MNEXPTOTP ) THEN
          CALL PRINF('error in (m)nexptotp, *', MNEXPTOTP, 1)
          STOP
        ENDIF
c
        CALL YMKFEXP(NLAMBS,NUMFOUR,NUMPHYS,FEXPE,FEXPO,FEXPBACK)
c
c-------rlscini() returns the scaled p_n^m()*sc^n, and mkexps()
c       returns the coefficients for the shifting in xs, ys, zs.
c
        CALL YRLSCINI(SCALL,BETASCAL,RLSC, NLAMBS, RLAMS, NTERMS)
        CALL YMKEXPS(BETASCAL,RLAMS,NLAMBS,NUMPHYS,NEXPTOTP,XS,YS,ZS)
c
        TIMEPRE=SECOND()
        WRITE(11, *) 'precomp time for current level', ILEV,
     1    TIMEPRE-TIMEB
c
c-------initialize the exponential expansions.
c
        ISTARTC = LADDER(ILEV+1)
        NBOXESC = LADDER(ILEV+2) - ISTARTC
        DO NBC = 1,NBOXESC
          MBC = ISTARTC + NBC
          DO JJ = 1,NEXPTOTP
            LEXP1(JJ,MBC) = 0.0D0
            LEXP2(JJ,MBC) = 0.0D0
          ENDDO
        ENDDO
c
        DO 1200 J = 1,NBOXES
          MYBOX = ISTART+J
          TIMEA = SECOND()
          CALL YMKUDEXP(J,LADDER,ILEV,NTERMS,MPOLE,RLAMS,NLAMBS,
     1      NUMFOUR,NUMPHYS,NTHMAX,NEXPTOT,NEXPTOTP,MEXPF1,
     2      MEXPF2,MEXPP1,MEXPP2,MEXPPALL(1,1),MEXPPALL(1,2),
     3      MEXPPALL(1,3),MEXPPALL(1,4),XS,YS,ZS,
     4      FEXPE,FEXPO,RLSC)
c
          TIMEB = SECOND()
          TIME_UDEXP = TIME_UDEXP + (TIMEB-TIMEA)
c
          TIMEA = SECOND()
          CALL MKUPLIST(ILEV,LADDER,MYBOX,I1,N1,IX1,IY1,
     1      I2,N2,IX2,IY2)
          CALL YPROCESSUP(SCALE(ILEV+1),LEXP1,I1,N1,IX1,IY1,I2,N2,
     1      IX2,IY2,MEXPPALL(1,1),MEXPPALL(1,2),XS,YS,ZS,NEXPTOTP,
     2      MNEXPTOTP)
          CALL MKDNLIST(ILEV,LADDER,MYBOX,I1,N1,IX1,IY1,
     1      I2,N2,IX2,IY2)
          CALL YPROCESSDN(SCALE(ILEV+1),LEXP2,I1,N1,IX1,IY1,I2,N2,
     1      IX2,IY2,MEXPPALL(1,3),MEXPPALL(1,4),XS,YS,ZS,NEXPTOTP,
     2      MNEXPTOTP)
          TIMEB = SECOND()
          TIME_PROCUD = TIME_PROCUD + (TIMEB-TIMEA)
1200    CONTINUE
c
c-------convert lexp1 and lexp2 to local multipole expansion.
c
        TIMEA=SECOND()
        DO 1300 J = 1,NBOXESC
          MYBOX = ISTARTC+J
          CALL YPHYSTOF(MEXPF1,NLAMBS,RLAMS,NUMFOUR,NUMPHYS,
     1      NTHMAX,LEXP1(1,MYBOX),FEXPBACK)
          CALL YPHYSTOF(MEXPF2,NLAMBS,RLAMS,NUMFOUR,NUMPHYS,
     1      NTHMAX,LEXP2(1,MYBOX),FEXPBACK)
          CALL YEXPTOLOCAL(BETASCAL,RLSC,MW1,NTERMS,RLAMS,WHTS,NLAMBS,
     1      YTOP, NUMFOUR,NTHMAX,NEXPTOT,MEXPF1,MEXPF2,SCALE(ILEV+1))
          CALL ADDEXP(MW1,LOCAL(0,0,MYBOX),NTERMS)
1300    CONTINUE
c
        TIMEB = SECOND()
        TIME_EXPTOMP = TIME_EXPTOMP + (TIMEB-TIMEA)
c
c-------next process north and south lists
c
        DO NBC = 1,NBOXESC
          MBC = ISTARTC + NBC
          DO JJ = 1,NEXPTOTP
            LEXP1(JJ,MBC) = 0.0D0
            LEXP2(JJ,MBC) = 0.0D0
          ENDDO
        ENDDO
c
        DO 1400 J = 1,NBOXES
          MYBOX = ISTART+J
          TIMEA = SECOND()
          CALL YMKNSEXP(J,LADDER,ILEV,NTERMS,MPOLE,MW1,MW2,
     1      RLAMS,NLAMBS,NUMFOUR,NUMPHYS,NTHMAX,NEXPTOT,NEXPTOTP,
     2      MEXPF1,MEXPF2,MEXPP1,MEXPP2,RDMINUS,MEXPPALL(1,1),
     3      MEXPPALL(1,2),MEXPPALL(1,3),MEXPPALL(1,4),
     4      MEXPPALL(1,5),MEXPPALL(1,6),MEXPPALL(1,7),
     5      MEXPPALL(1,8),XS,YS,ZS,
     6      FEXPE,FEXPO,RLSC)
c
          TIMEB = SECOND()
          TIME_MKNSEXP = TIME_MKNSEXP + (TIMEB-TIMEA)
          TIMEA = SECOND()
c
          CALL MKNOLIST(ILEV,LADDER,MYBOX,I1,N1,IX1,IY1,
     1      I2,N2,IX2,IY2,I3,N3,IX3,IY3,I4,N4,IX4,IY4)
          CALL YPROCESSNO(SCALE(ILEV+1),LEXP1,I1,N1,IX1,IY1,I2,N2,
     1      IX2,IY2,I3,N3,IX3,IY3,I4,N4,IX4,IY4,
     2      MEXPPALL(1,1),MEXPPALL(1,2),MEXPPALL(1,3),
     3      MEXPPALL(1,4),XS,YS,ZS,NEXPTOTP,MNEXPTOTP)
c
          CALL MKSOLIST(ILEV,LADDER,MYBOX,I1,N1,IX1,IY1,
     1      I2,N2,IX2,IY2,I3,N3,IX3,IY3,I4,N4,IX4,IY4)
          CALL YPROCESSSO(SCALE(ILEV+1),LEXP2,I1,N1,IX1,IY1,I2,N2,
     1      IX2,IY2,I3,N3,IX3,IY3,I4,N4,IX4,IY4,
     2      MEXPPALL(1,5),MEXPPALL(1,6),MEXPPALL(1,7),
     3      MEXPPALL(1,8),XS,YS,ZS,NEXPTOTP,MNEXPTOTP)
c
          TIMEB = SECOND()
          TIME_PROCNS = TIME_PROCNS + (TIMEB-TIMEA)
1400    CONTINUE
c
c-------convert lexp1 and lexp2 to local multipole expansion.
c
        TIMEA = SECOND()
c
        DO 1500 J = 1,NBOXESC
          MYBOX = ISTARTC+J
c
          CALL YPHYSTOF(MEXPF1,NLAMBS,RLAMS,NUMFOUR,NUMPHYS,
     1      NTHMAX,LEXP1(1,MYBOX),FEXPBACK)
          CALL YPHYSTOF(MEXPF2,NLAMBS,RLAMS,NUMFOUR,NUMPHYS,
     1      NTHMAX,LEXP2(1,MYBOX),FEXPBACK)
          CALL YEXPTOLOCAL(BETASCAL,RLSC,MW1,NTERMS,RLAMS,WHTS,NLAMBS,
     1      YTOP, NUMFOUR,NTHMAX,NEXPTOT,MEXPF1,MEXPF2,SCALE(ILEV+1))
          CALL ROTYTOZ(NTERMS,MW1,MW3,MW2,RDPLUS)
          CALL ADDEXP(MW2,LOCAL(0,0,MYBOX),NTERMS)
1500    CONTINUE
c
        TIMEB = SECOND()
        TIME_EXPTOMP = TIME_EXPTOMP + (TIMEB-TIMEA)
c
c-------next process east and west lists
c
        DO NBC = 1,NBOXESC
          MBC = ISTARTC + NBC
          DO JJ = 1,NEXPTOTP
            LEXP1(JJ,MBC) = 0.0D0
            LEXP2(JJ,MBC) = 0.0D0
          ENDDO
        ENDDO
c
        DO 1600 J = 1,NBOXES
          MYBOX = ISTART+J
          TIMEA = SECOND()
c
          CALL YMKEWEXP(J,LADDER,ILEV,NTERMS,MPOLE,MW1,RLAMS,
     1      NLAMBS,NUMFOUR,NUMPHYS,NTHMAX,NEXPTOT,NEXPTOTP,
     2      MEXPF1,MEXPF2,MEXPP1,MEXPP2,RDPLUS,MEXPPALL(1,1),
     3      MEXPPALL(1,2),MEXPPALL(1,3),MEXPPALL(1,4),
     4      MEXPPALL(1,5),MEXPPALL(1,6),MEXPPALL(1,7),
     5      MEXPPALL(1,8),MEXPPALL(1,9),MEXPPALL(1,10),
     6      MEXPPALL(1,11),MEXPPALL(1,12),MEXPPALL(1,13),
     7      MEXPPALL(1,14),MEXPPALL(1,15),MEXPPALL(1,16),
     8      XS,YS,ZS,FEXPE,FEXPO,RLSC)
c
          TIMEB = SECOND()
          TIME_MKEWEXP = TIME_MKEWEXP+ (TIMEB-TIMEA)
          TIMEA = SECOND()
c
          CALL MKEALIST(ILEV,LADDER,MYBOX,I1,N1,IX1,IY1,
     1      I2,N2,IX2,IY2,I3,N3,IX3,IY3,I4,N4,IX4,IY4,
     2      I5,N5,IX5,IY5,I6,N6,IX6,IY6,I7,N7,IX7,IY7,
     3      I8,N8,IX8,IY8)
          CALL YPROCESSEA(SCALE(ILEV+1),LEXP1,I1,N1,IX1,IY1,I2,N2,
     1      IX2,IY2,I3,N3,IX3,IY3,I4,N4,IX4,IY4,I5,N5,IX5,IY5,
     2      I6,N6,IX6,IY6,I7,N7,IX7,IY7,I8,N8,IX8,IY8,
     3      MEXPPALL(1,1),MEXPPALL(1,2),MEXPPALL(1,3),
     4      MEXPPALL(1,4),MEXPPALL(1,5),MEXPPALL(1,6),
     5      MEXPPALL(1,7),MEXPPALL(1,8),XS,YS,ZS,NEXPTOTP,
     6      MNEXPTOTP)
c
          CALL MKWELIST(ILEV,LADDER,MYBOX,I1,N1,IX1,IY1,
     1      I2,N2,IX2,IY2,I3,N3,IX3,IY3,I4,N4,IX4,IY4,
     2      I5,N5,IX5,IY5,I6,N6,IX6,IY6,I7,N7,IX7,IY7,
     3      I8,N8,IX8,IY8)
          CALL YPROCESSWE(SCALE(ILEV+1),LEXP2,I1,N1,IX1,IY1,I2,N2,
     1      IX2,IY2,I3,N3,IX3,IY3,I4,N4,IX4,IY4,I5,N5,IX5,IY5,
     2      I6,N6,IX6,IY6,I7,N7,IX7,IY7,I8,N8,IX8,IY8,
     3      MEXPPALL(1,9),MEXPPALL(1,10),MEXPPALL(1,11),
     4      MEXPPALL(1,12),MEXPPALL(1,13),MEXPPALL(1,14),
     5      MEXPPALL(1,15),MEXPPALL(1,16),XS,YS,ZS,NEXPTOTP,
     6      MNEXPTOTP)
c
          TIMEB = SECOND()
          TIME_PROCEW = TIME_PROCEW + (TIMEB-TIMEA)
1600    CONTINUE
c
c-------convert lexp1 and lexp2 to local multipole expansion.
c
        TIMEA = SECOND()
c
        DO 1700 J = 1,NBOXESC
          MYBOX = ISTARTC+J
          CALL YPHYSTOF(MEXPF1,NLAMBS,RLAMS,NUMFOUR,NUMPHYS,
     1      NTHMAX,LEXP1(1,MYBOX),FEXPBACK)
          CALL YPHYSTOF(MEXPF2,NLAMBS,RLAMS,NUMFOUR,NUMPHYS,
     1      NTHMAX,LEXP2(1,MYBOX),FEXPBACK)
          CALL YEXPTOLOCAL(BETASCAL,RLSC,MW1,NTERMS,RLAMS,WHTS,NLAMBS,
     1      YTOP, NUMFOUR,NTHMAX,NEXPTOT,MEXPF1,MEXPF2,SCALE(ILEV+1))
          CALL ROTZTOX(NTERMS,MW1,MW2,RDMINUS)
          CALL ADDEXP(MW2,LOCAL(0,0,MYBOX),NTERMS)
1700    CONTINUE
c
        TIMEB = SECOND()
        TIME_EXPTOMP = TIME_EXPTOMP + (TIMEB-TIMEA)
c
2000  CONTINUE
c
c======================================================================d
c     downward pass complete
c     local expansions at finest grid level are available.
c     they will generate the field due to all particles in the plane
c     excluding the neighboring boxes.
c======================================================================d
c
c     step 5: find box containing each atom and compute local expansion
c             at corresponding position
c
      TIMEA = SECOND()
      ISTART  =  LADDER(NLEV)
      NBOXES = LADDER(NLEV+1) - ISTART
      DO 2950 I = 1,NBOXES
        IOFF = IOFFST(I)
        NINBOX = IOFFST(I+1) - IOFF
        IF (NINBOX .EQ. 0) GOTO 2950
        MYBOX = ISTART + I
        DO 2900 J = 1,NINBOX
          IAT = IATADR(IOFF+J)
          SHIFT(1) = ZAT(1,IAT)
          SHIFT(2) = ZAT(2,IAT)
          SHIFT(3) = ZAT(3,IAT)
c
c   SOS SOS SOS I= 1, NBOXES
c   OMWS IAT ==size
c
c


c  YBRTAEV
c    evaluates local expansion at arbitrary point.
c
c  on input:
c
c    beta : the frequency of the equation.
c    local: coefficients of local expansion(scaled)
c    x0y0z0: the center of the expansion
c    point: point of evaluation
c    nterms: order of expansion
c    p: work arrays to hold legendre polynomials
c       and associated legendre functions, respectively.
c
c  on output:
c    rpot: computed potential

          CALL YBRTAEV(BETA,LOCAL(0,0,MYBOX),CENTER(1,MYBOX),SHIFT,
     1      NTERMS,POT(IAT),FIELD(1,IAT),SCALE(NLEV),P)

2900    CONTINUE
2950  CONTINUE
      TIMEB = SECOND()
      TIME_LCEVAL = TIME_LCEVAL + (TIMEB-TIMEA)

c
c
c
c
c
      OPEN (UNIT = 10 , FILE = 'points_in_space', FORM = 'unformatted',
     1      access = 'direct' , recl = 8*3*2000)
      OPEN (UNIT = 11 , FILE = 'm_potential', FORM = 'unformatted',
     1      access = 'direct' , recl = 8*2000) 
      OPEN (UNIT = 12 , FILE = 'm_field', FORM = 'unformatted',
     1      access = 'direct' , recl = 8*3*2000) 
      OPEN (UNIT = 13 , FILE = 'm_test_output')	


      WRITE( 10 , rec = 1 ) ZAT
      WRITE( 11 , rec = 1 ) POT
      WRITE( 12 , rec = 1 ) FIELD
      CLOSE (12)
      CLOSE (11)
      CLOSE (10)

      DO 2951 I = 1, NATOMS            
      WRITE (13,*) ZAT(1,I) ,ZAT(2,I) ,ZAT(3,I) ,POT(I) ,FIELD(1,I),
     1       FIELD(2,I), FIELD(3,I) 
2951  continue

      CLOSE(13)

c      DO 2952 I = 1 , IAT
c      WRITE( 5 , rec = 1 ) POT( I ) 
c      WRITE( 4 , rec = 1 ) FIELD( I )
c2952  continue      


c
c-----output computation time.
c
c      call prin2(' init time (assign etc.) is *',time_init,1)
c      call prin2(' upward pass time is *',time_uppass,1)
c      call prin2(' udexp time is *',time_udexp,1)
c      call prin2(' processud time is *',time_procud,1)
c      call prin2(' nsexp time is *',time_mknsexp,1)
c      call prin2(' processns time is *',time_procns,1)
c      call prin2(' ewexp time is *',time_mkewexp,1)
c      call prin2(' processew time is *',time_procew,1)
c      call prin2(' time for all tatas is *',time_tata,1)
c      call prin2(' exptomp time is *',time_exptomp,1)
c      call prin2(' lceval time is *',time_lceval,1)
      TSHIFTS = TIME_PROCUD + TIME_PROCNS + TIME_PROCEW
      TMKEXPS = TIME_UDEXP + TIME_MKNSEXP + TIME_MKEWEXP
c
c-----write execution time to a file.
c
      CALL WRITETIMES(NATOMS,NLEV,NTERMS,TIME_UPPASS,TMKEXPS,
     1  TSHIFTS,TIME_EXPTOMP,TIME_TATA,TIME_LCEVAL)
c
      RETURN
      END
c
cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccd
      SUBROUTINE YNBDIRECT(BETA,IFLAG,NLEV,ZAT,NATOMS,CHARGE,POT,FIELD,
     1  IOFFST,IATADR,LADDER,NBORS,CENTER)
cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccd
c
c  purpose:
c
c     computes interactions in nine nearest neighbors directly.
c
c     iflag = 0 for free-space problems
c     iflag = 1 for periodic problems (not yet implemented)
c
c  on input :
c    beta: the frequency.
c    iflag: for boundary conditions. the periodic case will be implemented
c           in future release.
c    nlev : total number of levels.
c    zat: particle locations
c    natoms: total number of particles.
c    charge: particle charges
c
c  on output:
c    pot : potential
c    field : force field (gradient of the potential)
c
c  note: error checking may be necessary in the future.
c
c  functions called: datan,dsqrt,dexp
c
cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccd
c
      IMPLICIT NONE
c
      INTEGER *4 IFLAG,NLEV,NATOMS,IOFFST(1),IATADR(1)
      INTEGER *4 LADDER(1)
      INTEGER *4 NBORS(1)
c
      REAL *8 BETA
      REAL *8 ZAT(3,NATOMS),CHARGE(NATOMS)
      REAL *8 CENTER(3,1),POT(NATOMS)
      REAL *8 FIELD(3,NATOMS)
c
c-----local variables
c
      INTEGER *4 NNBORS,KBOX,NINNBR
      INTEGER *4 ISTART,NBOXES,NINBOX,I,J,K,IOFF
      INTEGER *4 ILOW,JAT,IAT
      INTEGER *4 NCOUNT
      REAL *8 PI,EXPR
      REAL *8 RX,RY,RZ,RR
      REAL *8 RDIS,ZERO
      REAL *8 ZCNTRS(3,125)
      REAL *8 TERM1,TERM2
      COMPLEX *16 IMAG
c
      DATA IMAG/(0.0D0,1.0D0)/
      DATA ZERO/0.0D0/
c
      PI = 4.0D0*DATAN(1.0D0)
c
      ISTART = LADDER(NLEV)
      NBOXES = LADDER(NLEV+1) - ISTART
c
c-----loop through boxes
c
      DO 3000 I=1,NBOXES
        IOFF = IOFFST(I)
        NINBOX = IOFFST(I+1) - IOFF
        IF (NINBOX .EQ. 0) GOTO 3000
        CALL MKNBOR(I,NBORS,NNBORS,IFLAG,NLEV,LADDER,
     1    CENTER,ZCNTRS)
        DO 2400 K = 1,NNBORS
          KBOX = NBORS(K)
          ILOW = IOFFST(KBOX)
          NINNBR = IOFFST(KBOX+1) - ILOW
c
c---------skip box if it contains no particles
c
          IF (NINNBR .EQ. 0) GOTO 2400
          DO 2100 J = 1,NINBOX
            IAT = IATADR(IOFF+J)
            DO 2000 NCOUNT = 1,NINNBR
              JAT = IATADR(ILOW+NCOUNT)
              IF (IFLAG .EQ. 0) THEN
                IF ( IAT .EQ. JAT) GOTO 2000
                RX = ZAT(1,IAT) - ZAT(1,JAT)
                RY = ZAT(2,IAT) - ZAT(2,JAT)
                RZ = ZAT(3,IAT) - ZAT(3,JAT)
              ELSEIF (IFLAG .EQ. 1) THEN
                IF ((IAT .EQ. JAT).AND. (K.EQ.1)) GOTO 2000
                RX = ZAT(1,IAT) - ZAT(1,JAT) - ZCNTRS(1, K)
                RY = ZAT(2,IAT) - ZAT(2,JAT) - ZCNTRS(2, K)
                RZ = ZAT(3,IAT) - ZAT(3,JAT) - ZCNTRS(3, K)
              ENDIF
c
              RR =  RX*RX + RY*RY + RZ*RZ
              RDIS = DSQRT(RR)*BETA
              EXPR= DEXP(-RDIS)
              TERM1 = CHARGE(JAT)/RDIS*EXPR*PI/2.0D0
              POT(IAT) = POT(IAT) + TERM1
c
c-------------the field.
c
              TERM2=-TERM1*(1.0D0+RDIS)/RR
              FIELD(1,IAT)=FIELD(1,IAT)+RX*TERM2
              FIELD(2,IAT)=FIELD(2,IAT)+RY*TERM2
              FIELD(3,IAT)=FIELD(3,IAT)+RZ*TERM2
2000        CONTINUE
2100      CONTINUE
2400    CONTINUE
3000  CONTINUE
c
      OPEN (UNIT = 14 , FILE = 'points_in_space1', FORM = 'unformatted',
     1      access = 'direct' , recl = 8*3*2000)
      OPEN (UNIT = 15 , FILE = 'final_potential', FORM = 'unformatted',
     1      access = 'direct' , recl = 8*2000) 
      OPEN (UNIT = 16 , FILE = 'final_field', FORM = 'unformatted',
     1      access = 'direct' , recl = 8*3*2000) 
      OPEN (UNIT = 17 , FILE = 'final_test_output')	


      WRITE( 14 , rec = 1 ) ZAT
      WRITE( 15 , rec = 1 ) POT
      WRITE( 16 , rec = 1 ) FIELD
      CLOSE (16)
      CLOSE (15)
      CLOSE (14)

      DO 3001 I = 1, NATOMS            
      WRITE (17,*) ZAT(1,I) ,ZAT(2,I) ,ZAT(3,I) ,POT(I) ,FIELD(1,I),
     1       FIELD(2,I), FIELD(3,I) 
3001  continue

      CLOSE(17)







      RETURN
      END
c
