Click Start Over at the left bottom to start Back to Contents
1. Compiling NAMD at SGI clusters
Check resources
> module load cuda
> nvcc --version
cuda9.1 or above is required.
Following the following steps to compile NAMD
- Download newest version source code at NAMD
> tar -xzf NAMD_2.14_Source.tar.gz
> cd NAMD_2.14_Source
> tar -xf charm-6.10.2.tar
> cd charm-6.10.2
- Set compiling environment
Look at modules required for running Intel MPI Script
> module list
> module avail
> module purge
> module load compiler/intel/2020.2.254
> module load compiler/intelmpi/2020.2.254
or
> module swap mpt compiler/intelmpi
- Build charm++ of non-smp and smp version
> ./build charm++ ofi-linux-x86_64 icc ifort -j16 --with-production
> ./build charm++ ofi-linux-x86_64 icc ifort smp -j16 --with-production
- Test the compilations of charm++
Notice that before each new test the previous binaries must be deleted.
> cd ofi-linux-x86_64-ifort-smp-icc/tests/charm++
> make clean && make && make test
> cd ../../..
> cd ofi-linux-x86_64-ifort-icc/tests/charm++
> make clean && make && make test
> cd ../../..
> cd ofi-linux-x86_64-ifort-smp-icc/tests/charm++
> make clean && make
> cd megatest && make && make test
cd ../../../../..
- Download tcl and fftw
> wget http://www.ks.uiuc.edu/Research/namd/libraries/fftw-linux-x86_64.tar.gz
> wget http://www.ks.uiuc.edu/Research/namd/libraries/tcl8.5.9-linux-x86_64.tar.gz
> wget http://www.ks.uiuc.edu/Research/namd/libraries/tcl8.5.9-linux-x86_64-threaded.tar.gz
> tar xzf fftw-linux-x86_64.tar.gz
> mv linux-x86_64 fftw
> tar xzf tcl8.5.9-linux-x86_64.tar.gz
> mv tcl8.5.9-linux-x86_64 tcl
> tar xzf tcl8.5.9-linux-x86_64-threaded.tar.gz
> mv tcl8.5.9-linux-x86_64-threaded tcl-threaded
- Create charm.arch files
> cat << EOF > arch/Linux-x86_64-ofi-icc.arch
NAMD_ARCH = Linux-x86_64
CHARMARCH = ofi-linux-x86_64-ifort-icc
FLOATOPTS = -ip -xSKYLAKE-AVX512 -qopenmp-simd
CXX = icpc -std=c++11
CXXOPTS = -O2 \$(FLOATOPTS)
CXXNOALIASOPTS = -O2 -fno-alias \$(FLOATOPTS)
CXXCOLVAROPTS = -O2 -ip
CC = icc
COPTS = -O2 \$(FLOATOPTS)
EOF
> cat << EOF > arch/Linux-x86_64-ofi-smp-icc.arch
NAMD_ARCH = Linux-x86_64
CHARMARCH = ofi-linux-x86_64-ifort-smp-icc
FLOATOPTS = -ip -xSKYLAKE-AVX512 -qopenmp-simd
CXX = icpc -std=c++11
CXXOPTS = -O2 \$(FLOATOPTS)
CXXNOALIASOPTS = -O2 -fno-alias \$(FLOATOPTS)
CXXCOLVAROPTS = -O2 -ip
CC = icc
COPTS = -O2 \$(FLOATOPTS)
EOF
- Compile NAMD
> ./config Linux-x86_64-ofi-icc --charm-arch ofi-linux-x86_64-ifort-icc
> ./config Linux-x86_64-ofi-smp-icc --charm-arch ofi-linux-x86_64-ifort-smp-icc
> cd Linux-x86_64-ofi-icc
> make -j16
> cd ..
> cd Linux-x86_64-ofi-smp-icc
> make -j16
> cd ..
> module load cuda/9.1
> ./config Linux-x86_64-ofi-smp-icc.cuda --charm-arch ofi-linux-x86_64-ifort-smp-icc --with-cuda
> cd Linux-x86_64-ofi-smp-icc.cuda
> make -j16
2. Compiling NAMD at Cray XC/XE/XK
Check resources
> module load cudatookkit
> nvcc --version
cuda9.1 or above is required.
Following the following steps to compile NAMD
Download newest version source code as on SGI
Set compiling environment
> module swap PrgEnv-cray PrgEnv-intel (gnu for XE/XK)
> module load rca
> module load craype-hugepages8M
> module load fftw
- Build charm++ of non-smp and smp version
for XC
> ./build charm++ gni-crayxc persistent -j16 --with-production
> ./build charm++ gni-crayxc persistent smp -j16 --with-production
for XE/XK
> ./build charm++ gemini_gni-crayxe persistent -j16 --with-production
> ./build charm++ gemini_gni-crayxe persistent smp -j16 --with-production
Test the compilations of charm++ as on SGI
Download tcl and fftw as on SGI
Compile NAMD
> ./config CRAY-XC-intel --charm-arch gni-crayxc-persistent --with-fftw3
> ./config CRAY-XC-intel.smp --charm-arch gni-crayxc-persistent-smp --with-fftw3
> cd CRAY-XC-intel
> make -j16
> cd ..
> cd CRAY-XC-intel.smp
> make -j16
> cd ..
> module load cudatookkit
> nvcc --version
> ./config CRAY-XC-intel.cuda --charm-arch gni-crayxc-smp --with-cuda --with-fftw3
> cd CRAY-XC-intel.cuda
> make -j16
3. PBS script to run NAMD on SGI
When running CUDA NAMD, always add +idlepoll to the command line. This is needed to poll the GPU for results rather than sleeping while idle. Here is the pbs script for job submission.
#!/bin/csh
#PBS -l select=4:ncpus=48:mpiprocs=48:ngpus=1
set EXEC=/p/home/kuangz/NAMD_2.14_Source/Linux-x86_64-ofi-smp-icc-cuda
set SCR=$PBS_O_WORKDIR
cd $SCR
module unload mpt
module swap compiler/intel/2018.3.222 compiler/intel/2020.0.1
module load compiler/intelmpi/2020.1.0
module load cuda/9.1
mpirun -np 4 -f $PBS_NODEFILE $EXEC/namd2 +ppn 47 +commap 0 +pemap 1-47 +idlepoll +ignoresharing +isomalloc_sync npt.conf > npt.log
end
4. PBS script to run NAMD on Cray
When running CUDA NAMD, always add +idlepoll to the command line. This is needed to poll the GPU for results rather than sleeping while idle. Here is the pbs script for job submission.
#PBS -l select=4:ncpus=22:mpiprocs=22:ngpus=1
set SCR=$PBS_O_WORKDIR
cd $SCR
module swap PrgEnv-cray PrgEnv-intel
module load rca
module swap craype-hugepages2M craype-hugepages8M
module load fftw
module load cudatoolkit
setenv HUGETLB_DEFAULT_PAGE_SIZE 8M
setenv HUGETLB_MORECORE no
aprun -n 4 -N 1 -d 22 $EXEC +idlepoll +ppn 21 npt16.conf > npt16.log
5. For OmniPath network
I have built GPU version using
./build charm++ verbs-linux-x86_64 icc smp --with-qlogic --with-production
module load cuda
./config Linux-x86_64-icc --charm-arch verbs-linux-x86_64-smp-icc --with-cuda --cuda-prefix /p/app/cuda/9.1
It can be launched by
set SCR=$PBS_O_WORKDIR
set EXEC=/p/home/kuangz/NAMD_2.14_Source/Linux-x86_64-icc.cuda
cd $SCR
module load cuda
set NODES = `cat $PBS_NODEFILE`
set NODELIST = $SCR/namd2.nodelist
echo "group main" >! $NODELIST
foreach node ( $NODES )
echo "host $node" >> $NODELIST
end
@ npes = 2 * 2 * 23
$EXEC/charmrun ++p $npes ++ppn 23 ++nodelist $NODELIST $EXEC/namd2 +isomalloc_sync +setcpuaffinity +pemap 1-23,25-47 +commap 0,24 +idlepoll +ignoresharing stmv.namd > stmv.log1
I have also built MPI version.
- for single node
setenv CC icc ; setenv CXX icpc ; setenv F90 ifort ; setenv F77 ifort
./build charm++ multicore-linux64 iccstatic --with-production "-O3 -ip -xCORE-AVX512 -qopt-zmm-usage=high"
./config Linux-x86_64-ofi-icc.single --charm-arch multicore-linux64-iccstatic --with-mkl
make -j4
namd2 +p48 +setcupaffinity stmv.namd > test1.log
- for multiple nodes
module purge
module load compiler/intel/2019.3.199
module load compiler/intelmpi/2019.3.199
setenv CC icc ; setenv CXX icpc ; setenv F90 ifort ; setenv F77 ifort
setenv MPICXX mpiicpc; setenv MPI_CXX mpiicpc; setenv I_MPI_CC icc; setenv I_MPI_CXX icpc; setenv I_MPI_F90 ifort; setenv I_MPI_F77 ifort
./build charm++ mpi-linux-x86_64 smp mpicxx --with-production "-O3 -ip -xCORE-AVX512" -DCMK_OPTIMIZE -DMPICH_IGNORE_CXX_SEEK
./config Linux-x86_64-ofi-smp-icc.mpi --charm-arch mpi-linux-x86_64-smp-mpicxx --with-mkl
It can be launched by
set NODES=2
set NPPN=2
@ NMPI = $NODES * $NPPN
mpirun -ppn $NPPN -f $PBS_NODEFILE -np $NMPI $EXEC +ppn 23 +commap 0,24 +pemap 1-23,25-47 stmv.namd > stmv.mpi
6. For InfiniBand network
./build charm++ verbs-linux-x86_64 icc smp --with-production
./build charm++ verbs-linux-x86_64 icc smp ifort -j8 --with-production
./config Linux-x86_64-icc --charm-arch verbs-linux-x86_64-smp-icc --with-cuda --cuda-prefix /usr/local/cuda
It can be launched by
set NODES = `cat $PBS_NODEFILE`
set NODELIST = $SCR/namd2.nodelist
echo "group main" >! $NODELIST
foreach node ( $NODES )
echo "host $node" >> $NODELIST
end
$EXEC/charmrun ++p 152 ++ppn 19 ++nodelist $NODELIST $EXEC/namd2 +setcpuaffinity +pemap 1-19,21-39 +commap 0,20 +idlepoll +ignoresharing npt21.conf > npt21.log