Skip to content

Commit

Permalink
added MPI config and slurm files for OLCF Frontier
Browse files Browse the repository at this point in the history
  • Loading branch information
pjmaechling committed Aug 3, 2023
1 parent 128c12c commit b655214
Show file tree
Hide file tree
Showing 11 changed files with 334 additions and 0 deletions.
15 changes: 15 additions & 0 deletions examples/mpi/frontier/basin_test/basin_test.sl
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#!/bin/bash
#SBATCH -A geo112
#SBATCH -J basin_test
#SBATCH -o %x-%j.out
#SBATCH -t 0:10:00
#SBATCH -p batch
#SBATCH -N 1
#SBATCH --mail-type=ALL
#SBATCH [email protected]
cd $MEMBERWORK/geo112/ucvm
cp $PROJWORK/geo112/pmaech/test_ucvm/basin_test_compare.sh ./basin_test_compare.sh
srun -N1 -n 2 $PROJWORK/geo112/pmaech/ucvm227/bin/basin_query_mpi -f $PROJWORK/geo112/pmaech/ucvm227/conf/ucvm.conf -m cvms5 -i 20
-v 2500 -l 35.0,-122.5 -s 0.1 -x 16 -y 11 -b basin_test.simple
#cp basin_test.simple $PROJWORK/geo112/pmaech/test_ucvm/basin_test.simple
srun -N1 -n1 ./basin_test_compare.sh
64 changes: 64 additions & 0 deletions examples/mpi/frontier/basin_test/basin_test_compare.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#!/bin/bash
##
## Test case to run on Frontier
## This script od's the binary basin file, then diffs the two files, and
### prints an error if the diff file is non-zero
##

od ./basin_test.simple > ./basin_test_results.txt 2>&1

cat > ./basin_test_expected.txt << EOF_EXPECTED_RESULT
0000000 000000 137600 000000 137600 000000 137600 000000 000000
0000020 000000 000000 000000 000000 000000 000000 000000 000000
*
0000100 000000 137600 000000 137600 000000 137600 000000 137600
0000120 000000 000000 000000 000000 000000 000000 000000 000000
*
0000200 000000 137600 000000 137600 000000 137600 000000 137600
0000220 000000 137600 000000 000000 000000 000000 000000 000000
0000240 000000 000000 000000 000000 000000 000000 000000 000000
*
0000300 000000 137600 000000 137600 000000 137600 000000 137600
0000320 000000 137600 000000 137600 000000 000000 000000 000000
0000340 000000 000000 000000 000000 000000 000000 000000 000000
*
0000400 000000 137600 000000 137600 000000 137600 000000 137600
0000420 000000 137600 000000 137600 000000 137600 000000 000000
0000440 000000 000000 000000 000000 000000 000000 000000 000000
*
0000500 000000 137600 000000 137600 000000 137600 000000 137600
*
0000540 000000 000000 000000 000000 000000 000000 000000 000000
*
0000600 000000 137600 000000 137600 000000 137600 000000 137600
*
0000640 000000 137600 000000 000000 000000 000000 000000 000000
0000660 000000 000000 000000 000000 000000 000000 000000 000000
0000700 000000 137600 000000 137600 000000 137600 000000 137600
*
0000740 000000 137600 000000 137600 000000 000000 000000 000000
0000760 000000 000000 000000 000000 000000 000000 000000 000000
0001000 000000 137600 000000 137600 000000 137600 000000 137600
*
0001040 000000 137600 000000 137600 000000 137600 000000 000000
0001060 000000 000000 000000 000000 000000 000000 000000 000000
0001100 000000 137600 000000 137600 000000 137600 000000 137600
*
0001160 000000 000000 000000 000000 000000 000000 000000 000000
0001200 000000 137600 000000 137600 000000 137600 000000 137600
*
0001260 000000 137600 000000 000000 000000 000000 000000 000000
0001300
EOF_EXPECTED_RESULT

echo "Running frontier basin test basin_test for cvms5"
if diff basin_test_expected.txt basin_test_results.txt > /dev/null 2>&1
then
echo [SUCCESS]
echo "0"
else
echo [FAILURE]
echo "-1"
fi

trap 'rm -f "basin_test_expected.txt" "basin_test_results.txt"' exit
76 changes: 76 additions & 0 deletions examples/mpi/frontier/examples/run_basin_query_mpi_cvms5.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
#!/bin/bash
##
## example of running on usc/hpc cluster
##

if [ -z "$UCVM_INSTALL_PATH" ]; then
echo "Need to set UCVM_INSTALL_PATH to run >" ${0##*/}
exit
fi
source $UCVM_INSTALL_PATH/conf/ucvm_env.sh

BIN_DIR=${UCVM_INSTALL_PATH}/bin
CONF_DIR=${UCVM_INSTALL_PATH}/conf
TEST=basin_query_mpi_cvms5

expect=$(mktemp) || exit 1
result=$(mktemp) || (trap 'rm -f "$expect"'; exit 1)

salloc ${UCVM_SALLOC_ENV} -Q --nodes=1 --ntasks=2 --time=00:10:00 srun -Q -o ${TEST}.srun.out ${BIN_DIR}/basin_query_mpi -b ${TEST}.
simple -f ${CONF_DIR}/ucvm.conf -m cvms5 -i 20 -v 2500 -l 35.0,-122.5 -s 0.1 -x 16 -y 11

od ${TEST}.simple > $result 2>&1

cat > $expect << EOF_EXPECTED_RESULT
0000000 000000 137600 000000 137600 000000 137600 000000 000000
0000020 000000 000000 000000 000000 000000 000000 000000 000000
*
0000100 000000 137600 000000 137600 000000 137600 000000 137600
0000120 000000 000000 000000 000000 000000 000000 000000 000000
*
0000200 000000 137600 000000 137600 000000 137600 000000 137600
0000220 000000 137600 000000 000000 000000 000000 000000 000000
0000240 000000 000000 000000 000000 000000 000000 000000 000000
*
0000300 000000 137600 000000 137600 000000 137600 000000 137600
0000320 000000 137600 000000 137600 000000 000000 000000 000000
0000340 000000 000000 000000 000000 000000 000000 000000 000000
*
0000400 000000 137600 000000 137600 000000 137600 000000 137600
0000420 000000 137600 000000 137600 000000 137600 000000 000000
0000440 000000 000000 000000 000000 000000 000000 000000 000000
*
0000500 000000 137600 000000 137600 000000 137600 000000 137600
*
0000540 000000 000000 000000 000000 000000 000000 000000 000000
*
0000600 000000 137600 000000 137600 000000 137600 000000 137600
*
0000640 000000 137600 000000 000000 000000 000000 000000 000000
0000660 000000 000000 000000 000000 000000 000000 000000 000000
0000700 000000 137600 000000 137600 000000 137600 000000 137600
*
0000740 000000 137600 000000 137600 000000 000000 000000 000000
0000760 000000 000000 000000 000000 000000 000000 000000 000000
0001000 000000 137600 000000 137600 000000 137600 000000 137600
*
0001040 000000 137600 000000 137600 000000 137600 000000 000000
0001060 000000 000000 000000 000000 000000 000000 000000 000000
0001100 000000 137600 000000 137600 000000 137600 000000 137600
*
0001160 000000 000000 000000 000000 000000 000000 000000 000000
0001200 000000 137600 000000 137600 000000 137600 000000 137600
*
0001260 000000 137600 000000 000000 000000 000000 000000 000000
0001300
EOF_EXPECTED_RESULT

echo "Running examples_programs_basin basin_query_mpi_cvms5"
if diff $result $expect > /dev/null 2>&1
then
echo [SUCCESS]
else
echo [FAILURE]
fi

trap 'rm -f "$expect" "$result"' exit
10 changes: 10 additions & 0 deletions examples/mpi/frontier/examples/test_list.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
(cd basin; ./run_basin_query_mpi_cvms5.sh)
(cd basin; ./run_basin_query_mpi_complete_cencal_cvms.sh)
(cd basin; ./run_basin_query_mpi_cencal_cvms.sh)
(cd basin; ./run_basin_query_mpi_complete_cencal_cvms5.sh)
(cd basin; ./run_basin_query_mpi_complete_cencal_cvmsi.sh)
(cd basin; ./run_basin_query_mpi_cencal_cvmsi.sh)
(cd mesh; ./run_ucvm2mesh_mpi_cvmh.sh)
(cd mesh; ./run_ucvm2mesh_mpi_cvmsi.sh)
(cd mesh; ./run_ucvm2mesh_mpi_layer_cvmsi.sh)
(cd mesh; ./run_ucvm2mesh_mpi_layer_cvms.sh)
40 changes: 40 additions & 0 deletions examples/mpi/frontier/la_habra/lahabra_20m.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# List of CVMs to query
ucvmlist=cvmsi

## UCVM conf file
ucvmconf=/lustre/orion/proj-shared/geo112/pmaech/ucvm227/conf/ucvm.conf

## Gridding cell centered or vertex (CENTER, VERTEX)
gridtype=CENTER

## Spacing of cells
spacing=20.0

## Projection
proj=+proj=utm +datum=WGS84 +zone=11
rot=-39.9
x0=-118.20819
y0=33.85173
z0=0.0

## Number of cells along each dim
nx=1400
ny=1400
nz=600

## Partitioning of grid among processors (request px*py*pz processes in mpi submit)
px=25
py=25
pz=10

## Vs/Vp minimum
vp_min=0
vs_min=0

## Mesh and grid files. Meshtype must be one of valid formats (IJK-12, IJK-20, IJK-32, SORD)
meshfile=/lustre/orion/scratch/pmaech/geo112/ucvm/mesh_cvmsi_lahabra_20m_mpi.media
gridfile=/lustre/orion/scratch/pmaech/geo112/ucvm/mesh_cvmsi_lahabra_20m_mpi.grid
meshtype=IJK-32

## Location of scratch dir
scratch=/lustre/orion/scratch/pmaech/geo112/ucvm
26 changes: 26 additions & 0 deletions examples/mpi/frontier/la_habra/lahabra_20m.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
Calculations for this mesh include:
20m spacing
1400 x 1400 x 600
which is 28km x 28km x 12km
The total mesh points is 1.176B mesh points
The processor division is
25 x 25 x 10 which is 6250
In the .sl file, we request 125 nodes, which will run 50 jobs on each core

Timing from results:

cvmh
1691055953
1691053176
2777

cvmsi
1691014011
1691011348
2663


Mesh copy
1691056121
1691055953
168
22 changes: 22 additions & 0 deletions examples/mpi/frontier/la_habra/lahabra_20m.sl
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/bin/bash
#SBATCH -A geo112
#SBATCH -J lahabra_20m_mesh
#SBATCH -o %x-%j.out
#SBATCH -t 2:00:00
#SBATCH -p batch
#SBATCH -N 125
#SBATCH --threads-per-core=1
#SBATCH --mail-type=ALL
#SBATCH [email protected]
echo "start"
date +"%s"
cd $MEMBERWORK/geo112/ucvm
cp $PROJWORK/geo112/pmaech/test_ucvm/lahabra_20m.conf ./lahabra_20m.conf
srun -N 125 -n 6250 -c 1 --cpu-bind=threads --threads-per-core=1 $PROJWORK/geo112/pmaech/ucvm227/bin/ucvm2mesh_mpi -f ./lahabra_20m.
conf
echo "mesh complete"
date +"%s"
cp mesh_cvmsi_lahabra_20m_mpi.media $PROJWORK/geo112/pmaech/test_ucvm/mesh_cvmsi_lahabra_20m_mpi.media
cp mesh_cvmsi_lahabra_20m_mpi.grid $PROJWORK/geo112/pmaech/test_ucvm/mesh_cvmsi_lahabra_20m_mpi.grid
echo "mesh copied"
date +"%s"
12 changes: 12 additions & 0 deletions examples/mpi/frontier/single_node/run_basin_query_mpi_cvms5.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/bin/bash
##
## Script for running on OLCF Frontier
##
#
echo "Starting UCVM MPI Basin Query CVMS5 test"
source $UCVM_INSTALL_PATH/conf/ucvm_env.sh
BIN_DIR=${UCVM_INSTALL_PATH}/bin
CONF_DIR=${UCVM_INSTALL_PATH}/conf
TEST=basin_query_mpi_cvms5
${BIN_DIR}/basin_query_mpi -b ${TEST}.simple -f ${CONF_DIR}/ucvm.conf -m cvms5 -i 20 -v 2500 -l 35.0,-122.5 -s 0.1 -x 16 -y 11
echo "Completed UCVM MPI Basin Query CVMS5 test"
14 changes: 14 additions & 0 deletions examples/mpi/frontier/single_node/single_node.sl
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/bin/bash
#SBATCH -A geo112
#SBATCH -J ucvm_one_node_mpi_basin_test
#SBATCH -o %x-%j.out
#SBATCH -t 0:10:00
#SBATCH -p batch
#SBATCH -N 1
#SBATCH --threads-per-core=1
#SBATCH --mail-type=ALL
#SBATCH [email protected]
cd $MEMBERWORK/geo112/ucvm
cp $PROJWORK/geo112/pmaech/test_ucvm/test_ucvm_basin_mpi.sh ./test_ucvm_basin_mpi.sh
srun -N1 -n2 -c1 --cpu-bind=threads --threads-per-core=1 block:cyclic ./test_ucvm_basin_mpi.sh
cp basin_query_mpi_cvms5.simple $PROJWORK/pmaech/test_ucvm/basin_query_mpi_cvms5.simple
40 changes: 40 additions & 0 deletions examples/mpi/frontier/titan_mesh/titan_mesh.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# List of CVMs to query
ucvmlist=cvmsi

## UCVM conf file
ucvmconf=/lustre/orion/proj-shared/geo112/pmaech/ucvm227/conf/ucvm.conf

## Gridding cell centered or vertex (CENTER, VERTEX)
gridtype=CENTER

## Spacing of cells
spacing=2000.0

## Projection
proj=+proj=utm +datum=WGS84 +zone=11
rot=-40.0
x0=-122.3
y0=34.7835
z0=0.0

## Number of cells along each dim
nx=384
ny=248
nz=25

## Partitioning of grid among processors (request px*py*pz processes in mpi submit)
px=2
py=2
pz=5

## Vs/Vp minimum
vp_min=200
vs_min=200

## Mesh and grid files. Meshtype must be one of valid formats (IJK-12, IJK-20, IJK-32, SORD)
meshfile=/lustre/orion/scratch/pmaech/geo112/ucvm/mesh_cvmsi_sord_2000m_mpi.media
gridfile=/lustre/orion/scratch/pmaech/geo112/ucvm/mesh_cvmsi_sord_2000m_mpi.grid
meshtype=IJK-32

## Location of scratch dir
scratch=/lustre/orion/scratch/pmaech/geo112/ucvm
15 changes: 15 additions & 0 deletions examples/mpi/frontier/titan_mesh/titan_mesh.sl
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#!/bin/bash
#SBATCH -A geo112
#SBATCH -J titan_mesh
#SBATCH -o %x-%j.out
#SBATCH -t 0:10:00
#SBATCH -p batch
#SBATCH -N 2
#SBATCH --threads-per-core=1
#SBATCH --mail-type=ALL
#SBATCH [email protected]
cd $MEMBERWORK/geo112/ucvm
cp $PROJWORK/geo112/pmaech/test_ucvm/titan_mesh.conf ./titan_mesh.conf
srun -N 2 -n 20 -c 1 --cpu-bind=threads --threads-per-core=1 $PROJWORK/geo112/pmaech/ucvm227/bin/ucvm2mesh_mpi -f ./titan_mesh.conf
cp mesh_cvmsi_sord_2000m_mpi.media $PROJWORK/geo112/pmaech/test_ucvm/mesh_cvmsi_sord_2000m_mpi.media
cp mesh_cvmsi_sord_2000m_mpi.grid $PROJWORK/geo112/pmaech/test_ucvm/mesh_cvmsi_sord_2000m_mpi.grid

0 comments on commit b655214

Please sign in to comment.