From 3302750e94643038c683ebc0e00d1473c6998a4a Mon Sep 17 00:00:00 2001 From: Andrew-S-Rosen Date: Mon, 25 Mar 2024 12:00:41 -0700 Subject: [PATCH 1/9] Fix `copy_files` support with Parsl --- tests/parsl/test_emt_recipes.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/parsl/test_emt_recipes.py b/tests/parsl/test_emt_recipes.py index dd52a44214..a356830b8b 100644 --- a/tests/parsl/test_emt_recipes.py +++ b/tests/parsl/test_emt_recipes.py @@ -6,7 +6,7 @@ from ase.build import bulk -from quacc import SETTINGS, job +from quacc import SETTINGS, flow, job from quacc.recipes.emt.core import relax_job # skipcq: PYL-C0412 from quacc.recipes.emt.slabs import bulk_to_slabs_flow # skipcq: PYL-C0412 @@ -32,16 +32,16 @@ def test_functools(tmp_path, monkeypatch, job_decorators): assert result[-1]["fmax"] == 0.1 -# def test_copy_files(tmp_path, monkeypatch): -# monkeypatch.chdir(tmp_path) -# atoms = bulk("Cu") +def test_copy_files(tmp_path, monkeypatch): + monkeypatch.chdir(tmp_path) + atoms = bulk("Cu") -# @flow -# def myflow(atoms): -# result1 = relax_job(atoms) -# return relax_job(result1["atoms"], copy_files={result1["dir_name"]: "opt.*"}) + @flow + def myflow(atoms): + result1 = relax_job(atoms) + return relax_job(result1["atoms"], copy_files={result1["dir_name"]: "opt.*"}) -# assert "atoms" in myflow(atoms).result() + assert "atoms" in myflow(atoms).result() def test_phonon_flow(tmp_path, monkeypatch): From 626596e71574ed2a0fcffc1ded68dbac1fa5e7ab Mon Sep 17 00:00:00 2001 From: "Andrew S. Rosen" Date: Mon, 25 Mar 2024 12:06:26 -0700 Subject: [PATCH 2/9] Update requirements-parsl.txt --- tests/requirements-parsl.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/requirements-parsl.txt b/tests/requirements-parsl.txt index 7cc8e244a8..f99bcb6dd2 100644 --- a/tests/requirements-parsl.txt +++ b/tests/requirements-parsl.txt @@ -1 +1 @@ -parsl[monitoring]==2024.3.18 +git+https://github.com/Parsl/parsl.git#benc-plugin-future-resolution From 8c87a9c385015b1f70302c3e5cf9edc701a8a0ae Mon Sep 17 00:00:00 2001 From: "Andrew S. Rosen" Date: Mon, 25 Mar 2024 13:22:06 -0700 Subject: [PATCH 3/9] Update requirements-parsl.txt --- tests/requirements-parsl.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/requirements-parsl.txt b/tests/requirements-parsl.txt index f99bcb6dd2..bf782fc5f0 100644 --- a/tests/requirements-parsl.txt +++ b/tests/requirements-parsl.txt @@ -1 +1 @@ -git+https://github.com/Parsl/parsl.git#benc-plugin-future-resolution +git+https://github.com/Parsl/parsl.git@benc-plugin-future-resolution From ff53e12523ea0151342a564cf974f1c07b42032d Mon Sep 17 00:00:00 2001 From: "Andrew S. Rosen" Date: Mon, 29 Apr 2024 11:23:48 -0700 Subject: [PATCH 4/9] Update conftest.py --- tests/parsl/conftest.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/parsl/conftest.py b/tests/parsl/conftest.py index 81295056ed..6c6ad8c5e3 100644 --- a/tests/parsl/conftest.py +++ b/tests/parsl/conftest.py @@ -2,6 +2,9 @@ from pathlib import Path +from parsl.config import Config +from parsl.dataflow.dependency_resolvers import DEEP_DEPENDENCY_RESOLVER + TEST_RESULTS_DIR = Path(__file__).parent / "_test_results" TEST_SCRATCH_DIR = Path(__file__).parent / "_test_scratch" TEST_RUNINFO = Path(__file__).parent / "runinfo" @@ -16,7 +19,7 @@ def pytest_sessionstart(): import os if parsl: - parsl.load() + parsl.load(Config(dependency_resolver=DEEP_DEPENDENCY_RESOLVER)) file_dir = Path(__file__).parent os.environ["QUACC_CONFIG_FILE"] = str(file_dir / "quacc.yaml") os.environ["QUACC_RESULTS_DIR"] = str(TEST_RESULTS_DIR) From 3a56019f5e4095ee5f29cd08c0d6aeab066ec52c Mon Sep 17 00:00:00 2001 From: "Andrew S. Rosen" Date: Mon, 29 Apr 2024 11:27:38 -0700 Subject: [PATCH 5/9] Update executors.md --- docs/user/wflow_engine/executors.md | 54 ++++++++++++++++------------- 1 file changed, 30 insertions(+), 24 deletions(-) diff --git a/docs/user/wflow_engine/executors.md b/docs/user/wflow_engine/executors.md index 3c28df95d1..6106827b4e 100644 --- a/docs/user/wflow_engine/executors.md +++ b/docs/user/wflow_engine/executors.md @@ -379,6 +379,7 @@ If you haven't done so already: ```python import parsl from parsl.config import Config + from parsl.dataflow.dependency_resolvers import DEEP_DEPENDENCY_RESOLVER from parsl.executors import HighThroughputExecutor from parsl.launchers import SrunLauncher from parsl.providers import SlurmProvider @@ -394,24 +395,25 @@ If you haven't done so already: env_vars = f"export OMP_NUM_THREADS={cores_per_job},1" # (1)! config = Config( - strategy="htex_auto_scale", # (2)! + dependency_resolver=DEEP_DEPENDENCY_RESOLVER, # (2)! + strategy="htex_auto_scale", # (3)! executors=[ HighThroughputExecutor( - label="quacc_parsl", # (3)! - max_workers_per_node=cores_per_node, # (4)! - cores_per_worker=cores_per_job, # (5)! + label="quacc_parsl", # (4)! + max_workers_per_node=cores_per_node, # (5)! + cores_per_worker=cores_per_job, # (6)! provider=SlurmProvider( account=account, qos="debug", constraint="cpu", - worker_init=f"source ~/.bashrc && conda activate quacc && {env_vars}", # (6)! - walltime="00:10:00", # (7)! - nodes_per_block=nodes_per_allocation, # (8)! - init_blocks=0, # (9)! - min_blocks=min_allocations, # (10)! - max_blocks=max_allocations, # (11)! - launcher=SrunLauncher(), # (12)! - cmd_timeout=60, # (13)! + worker_init=f"source ~/.bashrc && conda activate quacc && {env_vars}", # (7)! + walltime="00:10:00", # (8)! + nodes_per_block=nodes_per_allocation, # (9)! + init_blocks=0, # (10)! + min_blocks=min_allocations, # (11)! + max_blocks=max_allocations, # (12)! + launcher=SrunLauncher(), # (13)! + cmd_timeout=60, # (14)! ), ) ], @@ -422,29 +424,31 @@ If you haven't done so already: 1. Since we are running single-core jobs, we need to set the `OMP_NUM_THREADS` environment variable to "1,1" according to the [TBLite documentation](https://tblite.readthedocs.io/en/latest/tutorial/parallel.html#running-tblite-in-parallel). - 2. Unique to the `HighThroughputExecutor`, this `strategy` will automatically scale the number of active blocks (i.e. Slurm allocations) up or down based on the number of jobs remaining. We set `max_blocks=1` here so it can't scale up beyond 1 Slurm job, but it can scale down from 1 to 0 since `min_blocks=0`. By setting `init_blocks=0`, no Slurm allocation will be requested until jobs are launched. + 2. This is an opt-in feature needed when using Parsl to ensure all features in quacc are supported. - 3. This is just an arbitrary label for file I/O. + 3. Unique to the `HighThroughputExecutor`, this `strategy` will automatically scale the number of active blocks (i.e. Slurm allocations) up or down based on the number of jobs remaining. We set `max_blocks=1` here so it can't scale up beyond 1 Slurm job, but it can scale down from 1 to 0 since `min_blocks=0`. By setting `init_blocks=0`, no Slurm allocation will be requested until jobs are launched. - 4. The maximum number of running jobs per node. If you are running a non-MPI job, this value will generally be the number of physical cores per node (this example). Perlmutter has 128 physical CPU cores, so we have set a value of 128 here. + 4. This is just an arbitrary label for file I/O. - 5. The number of cores per job. We are running single-core jobs in this example. + 5. The maximum number of running jobs per node. If you are running a non-MPI job, this value will generally be the number of physical cores per node (this example). Perlmutter has 128 physical CPU cores, so we have set a value of 128 here. - 6. Any commands to run before carrying out any of the Parsl jobs. This is useful for setting environment variables, activating a given Conda environment, and loading modules. + 6. The number of cores per job. We are running single-core jobs in this example. - 7. The walltime for each block (i.e. Slurm allocation). + 7. Any commands to run before carrying out any of the Parsl jobs. This is useful for setting environment variables, activating a given Conda environment, and loading modules. - 8. The number of nodes that each block (i.e. Slurm allocation) should allocate. + 8. The walltime for each block (i.e. Slurm allocation). - 9. Sets the number of blocks (e.g. Slurm allocations) to provision during initialization of the workflow. We set this to a value of 0 so that there isn't a running Slurm job before any jobs have been submitted to Parsl. + 9. The number of nodes that each block (i.e. Slurm allocation) should allocate. - 10. Sets the minimum number of blocks (e.g. Slurm allocations) to maintain during [elastic resource management](https://parsl.readthedocs.io/en/stable/userguide/execution.html#elasticity). We set this to 0 so that Slurm jobs aren't running when there are no remaining jobs. + 10. Sets the number of blocks (e.g. Slurm allocations) to provision during initialization of the workflow. We set this to a value of 0 so that there isn't a running Slurm job before any jobs have been submitted to Parsl. - 11. Sets the maximum number of active blocks (e.g. Slurm allocations) during [elastic resource management](https://parsl.readthedocs.io/en/stable/userguide/execution.html#elasticity). We set this to 1 here, but it can be increased to have multiple Slurm jobs running simultaneously. Raising `max_blocks` to a larger value will allow the "htex_auto_scale" strategy to upscale resources as needed. + 11. Sets the minimum number of blocks (e.g. Slurm allocations) to maintain during [elastic resource management](https://parsl.readthedocs.io/en/stable/userguide/execution.html#elasticity). We set this to 0 so that Slurm jobs aren't running when there are no remaining jobs. - 12. The type of Launcher to use. `SrunLauncher()` will distribute jobs across the cores and nodes of the Slurm allocation. It should not be used for `PythonApp`s that themselves call MPI, which should use `SimpleLauncher()` instead. + 12. Sets the maximum number of active blocks (e.g. Slurm allocations) during [elastic resource management](https://parsl.readthedocs.io/en/stable/userguide/execution.html#elasticity). We set this to 1 here, but it can be increased to have multiple Slurm jobs running simultaneously. Raising `max_blocks` to a larger value will allow the "htex_auto_scale" strategy to upscale resources as needed. - 13. The maximum time to wait (in seconds) for the job scheduler info to be retrieved/sent. + 13. The type of Launcher to use. `SrunLauncher()` will distribute jobs across the cores and nodes of the Slurm allocation. It should not be used for `PythonApp`s that themselves call MPI, which should use `SimpleLauncher()` instead. + + 14. The maximum time to wait (in seconds) for the job scheduler info to be retrieved/sent. Now we define the workflow, apply it to all molecules in the "g2" collection, and monitor the progress of our calculations. @@ -779,6 +783,7 @@ First, prepare your `QUACC_VASP_PP_PATH` environment variable in the `~/.bashrc` ```python import parsl from parsl.config import Config + from parsl.dataflow.dependency_resolvers import DEEP_DEPENDENCY_RESOLVER from parsl.executors import HighThroughputExecutor from parsl.launchers import SimpleLauncher from parsl.providers import SlurmProvider @@ -795,6 +800,7 @@ First, prepare your `QUACC_VASP_PP_PATH` environment variable in the `~/.bashrc` max_allocations = 1 config = Config( + dependency_resolver=DEEP_DEPENDENCY_RESOLVER, strategy="htex_auto_scale", executors=[ HighThroughputExecutor( From f10c912c966b2ca927bc0e6d548a7b7c45d79310 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 12 May 2024 08:00:12 +0000 Subject: [PATCH 6/9] pre-commit auto-fixes --- tests/parsl/test_emt_recipes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/parsl/test_emt_recipes.py b/tests/parsl/test_emt_recipes.py index e86c55b067..cb053eb6d6 100644 --- a/tests/parsl/test_emt_recipes.py +++ b/tests/parsl/test_emt_recipes.py @@ -6,7 +6,7 @@ from ase.build import bulk -from quacc import SETTINGS, flow, job +from quacc import flow, job from quacc.recipes.emt.core import relax_job # skipcq: PYL-C0412 from quacc.recipes.emt.slabs import bulk_to_slabs_flow # skipcq: PYL-C0412 From 9d2f23f12e6a9bafe8d4c0f24d6160e127bddaf1 Mon Sep 17 00:00:00 2001 From: "Andrew S. Rosen" Date: Thu, 30 May 2024 16:19:50 -0700 Subject: [PATCH 7/9] Update requirements-parsl.txt --- tests/requirements-parsl.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/requirements-parsl.txt b/tests/requirements-parsl.txt index bf782fc5f0..ede9c501c4 100644 --- a/tests/requirements-parsl.txt +++ b/tests/requirements-parsl.txt @@ -1 +1 @@ -git+https://github.com/Parsl/parsl.git@benc-plugin-future-resolution +parsl[monitoring]==2024.5.27 From e99b7e7b79bbafad7c0cffe82adfd6ead61e6adf Mon Sep 17 00:00:00 2001 From: "Andrew S. Rosen" Date: Thu, 30 May 2024 16:20:11 -0700 Subject: [PATCH 8/9] Update pyproject.toml --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 7bcf2567e3..dd31ba0ac7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,7 +50,7 @@ jobflow = ["jobflow[fireworks]>=0.1.14", "jobflow-remote>=0.1.0"] mlp = ["matgl>=1.0.0", "chgnet>=0.3.3", "mace-torch>=0.3.3", "torch-dftd>=0.4.0"] mp = ["pymatgen-io-validation>=0.0.1"] newtonnet = ["newtonnet>=1.1"] -parsl = ["parsl[monitoring]>=2023.10.23; platform_system!='Windows'"] +parsl = ["parsl[monitoring]>=2024.5.27; platform_system!='Windows'"] phonons = ["phonopy>=2.20.0", "seekpath>=2.1.0"] prefect = ["prefect>=2.14.14", "prefect-dask>=0.2.6", "dask-jobqueue>=0.8.2"] redun = ["redun>=0.16.2"] From 2f83840be546865698c953b1a056acb8628696aa Mon Sep 17 00:00:00 2001 From: "Andrew S. Rosen" Date: Thu, 30 May 2024 16:27:06 -0700 Subject: [PATCH 9/9] Update executors.md --- docs/user/wflow_engine/executors.md | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/docs/user/wflow_engine/executors.md b/docs/user/wflow_engine/executors.md index f98a717d16..6d2129269a 100644 --- a/docs/user/wflow_engine/executors.md +++ b/docs/user/wflow_engine/executors.md @@ -799,15 +799,13 @@ First, prepare your `QUACC_VASP_PP_PATH` environment variable in the `~/.bashrc` vasp_parallel_cmd = ( f"srun -N {nodes_per_job} --ntasks-per-node={cores_per_node} --cpu_bind=cores" ) - min_allocations = 0 - max_allocations = 1 config = Config( dependency_resolver=DEEP_DEPENDENCY_RESOLVER, strategy="htex_auto_scale", executors=[ HighThroughputExecutor( - label="quacc_parsl", + label="quacc_mpi_parsl", max_workers_per_node=nodes_per_allocation // nodes_per_job, # (1)! cores_per_worker=1e-6, # (2)! provider=SlurmProvider( @@ -817,9 +815,6 @@ First, prepare your `QUACC_VASP_PP_PATH` environment variable in the `~/.bashrc` worker_init=f"source ~/.bashrc && conda activate quacc && module load vasp/6.4.1-cpu && export QUACC_VASP_PARALLEL_CMD='{vasp_parallel_cmd}'", walltime="00:10:00", nodes_per_block=nodes_per_allocation, - init_blocks=0, - min_blocks=min_allocations, - max_blocks=max_allocations, launcher=SimpleLauncher(), # (3)! cmd_timeout=60, ), @@ -831,7 +826,7 @@ First, prepare your `QUACC_VASP_PP_PATH` environment variable in the `~/.bashrc` parsl.load(config) ``` - 1. Unlike the prior example, here `max_workers_per_node` is defining the maximum number of concurrent jobs in total and not the maximum number of jobs run per node. + 1. Unlike the prior example, here `max_workers_per_node` is defining the maximum number of concurrent MPI jobs to run per allocation. 2. This is recommended in the Parsl manual for jobs that spawn MPI processes.